From 8b1676e3b7bca183fea725d38d62350d2688078b Mon Sep 17 00:00:00 2001 From: Jun-ichi Takimoto Date: Sat, 12 Sep 2015 20:35:56 +0900 Subject: 36491: Multibyte support for parameter expansion flags B,E,N --- Src/glob.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Src/glob.c') diff --git a/Src/glob.c b/Src/glob.c index dea1bf50e..43d135b99 100644 --- a/Src/glob.c +++ b/Src/glob.c @@ -2491,17 +2491,17 @@ get_match_ret(char *s, int b, int e, int fl, char *replstr, ll += 1 + (l - (e - b)); if (fl & SUB_BIND) { /* position of start of matched portion */ - sprintf(buf, "%d ", b + 1); + sprintf(buf, "%d ", MB_METASTRLEN2END(s, 0, s+b) + 1); ll += (bl = strlen(buf)); } if (fl & SUB_EIND) { /* position of end of matched portion */ - sprintf(buf + bl, "%d ", e + 1); + sprintf(buf + bl, "%d ", MB_METASTRLEN2END(s, 0, s+e) + 1); ll += (bl = strlen(buf)); } if (fl & SUB_LEN) { /* length of matched portion */ - sprintf(buf + bl, "%d ", e - b); + sprintf(buf + bl, "%d ", MB_METASTRLEN2END(s+b, 0, s+e)); ll += (bl = strlen(buf)); } if (bl) -- cgit v1.2.3 From bd5806aa0a98d8de6e1b6e7e4b7694b89952f08d Mon Sep 17 00:00:00 2001 From: Mikael Magnusson Date: Thu, 24 Sep 2015 20:51:59 +0200 Subject: 36603: glob: fix dirfd leak during Y shortcut qualifier --- ChangeLog | 11 +++++++---- Src/glob.c | 4 +++- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'Src/glob.c') diff --git a/ChangeLog b/ChangeLog index 80a7d19c4..41a56f966 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,12 +1,15 @@ +2015-09-24 Mikael Magnusson + + * 36603: Src/glob.c: fix dirfd leak during Y shortcut qualifier + 2015-09-24 Daniel Shahaf - * unposted: Completion/Unix/Command/_hg: _hg: Declare $expl as - an array + * unposted: Completion/Unix/Command/_hg: Declare $expl as an array 2015-09-24 Christoph Mathys - * 36613: Completion/Unix/Command/_hg: _hg: extend completion - for hg push to support branch and bookmark + * 36613: Completion/Unix/Command/_hg: extend completion for hg + push to support branch and bookmark 2015-09-23 Barton E. Schaefer diff --git a/Src/glob.c b/Src/glob.c index 43d135b99..fa3ce25f4 100644 --- a/Src/glob.c +++ b/Src/glob.c @@ -634,8 +634,10 @@ scanner(Complist q, int shortcircuit) } else { /* if the last filename component, just add it */ insert(fn, 1); - if (shortcircuit && shortcircuit == matchct) + if (shortcircuit && shortcircuit == matchct) { + closedir(lock); return; + } } } } -- cgit v1.2.3 From f9d7651c2554bb5db0373f63185ff358f795ab3c Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Mon, 28 Sep 2015 20:31:51 +0100 Subject: 36682: expand pattern interface to optimise unmetafication --- ChangeLog | 6 + Src/Zle/complist.c | 5 +- Src/Zle/zle_hist.c | 4 +- Src/glob.c | 56 +++++---- Src/pattern.c | 337 +++++++++++++++++++++++++++++++++++++---------------- Src/zsh.h | 10 ++ 6 files changed, 288 insertions(+), 130 deletions(-) (limited to 'Src/glob.c') diff --git a/ChangeLog b/ChangeLog index 546620d6b..38e0e4627 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2015-09-28 Peter Stephenson + + * 36682: Src/glob.c, Src/pattern.c, Src/zsh.h, + Src/Zle/complist,c, Src/Zle/zle_hist.c: expand pattern interface + to allow unmetafying trial string once for reuse. + 2015-09-28 Daniel Shahaf * unposted: Test/D04parameter.ztst: Test for 36669 diff --git a/Src/Zle/complist.c b/Src/Zle/complist.c index 433701514..986ad31ea 100644 --- a/Src/Zle/complist.c +++ b/Src/Zle/complist.c @@ -868,7 +868,7 @@ putmatchcol(char *group, char *n) nrefs = MAX_POS - 1; if ((!pc->prog || !group || pattry(pc->prog, group)) && - pattryrefs(pc->pat, n, -1, -1, 0, &nrefs, begpos, endpos)) { + pattryrefs(pc->pat, n, -1, -1, NULL, 0, &nrefs, begpos, endpos)) { if (pc->cols[1]) { patcols = pc->cols; @@ -900,7 +900,8 @@ putfilecol(char *group, char *filename, mode_t m, int special) nrefs = MAX_POS - 1; if ((!pc->prog || !group || pattry(pc->prog, group)) && - pattryrefs(pc->pat, filename, -1, -1, 0, &nrefs, begpos, endpos)) { + pattryrefs(pc->pat, filename, -1, -1, NULL, + 0, &nrefs, begpos, endpos)) { if (pc->cols[1]) { patcols = pc->cols; diff --git a/Src/Zle/zle_hist.c b/Src/Zle/zle_hist.c index 0cff0391a..95d96c95c 100644 --- a/Src/Zle/zle_hist.c +++ b/Src/Zle/zle_hist.c @@ -1306,8 +1306,8 @@ doisearch(char **args, int dir, int pattern) * this mode. */ if (!skip_pos && - pattryrefs(patprog, zt, -1, -1, 0, NULL, NULL, - &end_pos)) + pattryrefs(patprog, zt, -1, -1, NULL, 0, + NULL, NULL, &end_pos)) t = zt; } else { if (!matchlist && !skip_pos) { diff --git a/Src/glob.c b/Src/glob.c index fa3ce25f4..8bf73520f 100644 --- a/Src/glob.c +++ b/Src/glob.c @@ -2780,7 +2780,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, p->flags &= ~(PAT_NOTSTART|PAT_NOTEND); if (fl & SUB_ALL) { - int i = matched && pattry(p, s); + int i = matched && pattrylen(p, s, -1, -1, NULL, 0); *sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0, NULL); if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i))) return 0; @@ -2809,7 +2809,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, * Largest/smallest possible match at head of string. * First get the longest match... */ - if (pattry(p, s)) { + if (pattrylen(p, s, -1, -1, NULL, 0)) { /* patmatchlen returns metafied length, as we need */ int mlen = patmatchlen(); if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { @@ -2820,7 +2820,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, mb_charinit(); for (t = s, umlen = 0; t < s + mlen; ) { set_pat_end(p, *t); - if (pattrylen(p, s, t - s, umlen, 0)) { + if (pattrylen(p, s, t - s, umlen, NULL, 0)) { mlen = patmatchlen(); break; } @@ -2847,7 +2847,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, tmatch = NULL; for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) { set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, ioff)) + if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) tmatch = t; if (fl & SUB_START) break; @@ -2857,7 +2857,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, *sp = get_match_ret(*sp, tmatch - s, l, fl, replstr, NULL); return 1; } - if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) { + if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, NULL, ioff)) { *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); return 1; } @@ -2870,7 +2870,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, mb_charinit(); for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) { set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, ioff)) { + if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) { *sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL); return 1; } @@ -2878,7 +2878,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, break; umlen -= iincchar(&t); } - if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) { + if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, NULL, ioff)) { *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); return 1; } @@ -2887,7 +2887,8 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, case SUB_SUBSTR: /* Smallest at start, but matching substrings. */ set_pat_start(p, l); - if (!(fl & SUB_GLOBAL) && pattry(p, s + l) && !--n) { + if (!(fl & SUB_GLOBAL) && pattrylen(p, s + l, -1, -1, NULL, 0) && + !--n) { *sp = get_match_ret(*sp, 0, 0, fl, replstr, NULL); return 1; } /* fall through */ @@ -2908,7 +2909,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, for (; t < s + l; ioff++) { /* Find the longest match from this position. */ set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, ioff)) { + if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) { char *mpos = t + patmatchlen(); if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { char *ptr; @@ -2922,7 +2923,8 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, */ for (ptr = t, umlen2 = 0; ptr < mpos;) { set_pat_end(p, *ptr); - if (pattrylen(p, t, ptr - t, umlen2, ioff)) { + if (pattrylen(p, t, ptr - t, umlen2, + NULL, ioff)) { mpos = t + patmatchlen(); break; } @@ -2970,7 +2972,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, */ set_pat_start(p, l); if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG && - pattry(p, s + l) && !--n) { + pattrylen(p, s + l, -1, -1, NULL, 0) && !--n) { *sp = get_match_ret(*sp, 0, 0, fl, replstr, repllist); return 1; } @@ -2981,7 +2983,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, /* Longest/shortest at end, matching substrings. */ if (!(fl & SUB_LONG)) { set_pat_start(p, l); - if (pattrylen(p, s + l, 0, 0, umltot) && !--n) { + if (pattrylen(p, s + l, 0, 0, NULL, umltot) && !--n) { *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); return 1; } @@ -3001,7 +3003,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, mb_charinit(); for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) { set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, ioff)) { + if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) { nmatches++; tmatch = t; } @@ -3017,7 +3019,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, mb_charinit(); for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) { set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, ioff) && + if (pattrylen(p, t, s + l - t, umlen, NULL, ioff) && !n--) { tmatch = t; break; @@ -3030,7 +3032,8 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { for (t = tmatch, umlen = 0; t < mpos; ) { set_pat_end(p, *t); - if (pattrylen(p, tmatch, t - tmatch, umlen, ioff)) { + if (pattrylen(p, tmatch, t - tmatch, umlen, + NULL, ioff)) { mpos = tmatch + patmatchlen(); break; } @@ -3042,7 +3045,8 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, return 1; } set_pat_start(p, l); - if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, umltot) && !--n) { + if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, NULL, umltot) && + !--n) { *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); return 1; } @@ -3167,7 +3171,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, */ for (t = s, umlen = 0; t < s + mlen; METAINC(t), umlen++) { set_pat_end(p, *t); - if (pattrylen(p, s, t - s, umlen, 0)) { + if (pattrylen(p, s, t - s, umlen, NULL, 0)) { mlen = patmatchlen(); break; } @@ -3187,7 +3191,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, if (t > s && t[-1] == Meta) t--; set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, ioff)) { + if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) { *sp = get_match_ret(*sp, t - s, l, fl, replstr, NULL); return 1; } @@ -3203,7 +3207,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, for (ioff = 0, t = s, umlen = uml; t < s + l; ioff++, METAINC(t), umlen--) { set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, ioff)) { + if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) { *sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL); return 1; } @@ -3235,7 +3239,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, for (; t < s + l; METAINC(t), ioff++, umlen--) { /* Find the longest match from this position. */ set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, ioff)) { + if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) { char *mpos = t + patmatchlen(); if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { char *ptr; @@ -3243,7 +3247,8 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, for (ptr = t, umlen2 = 0; ptr < mpos; METAINC(ptr), umlen2++) { set_pat_end(p, *ptr); - if (pattrylen(p, t, ptr - t, umlen2, ioff)) { + if (pattrylen(p, t, ptr - t, umlen2, + NULL, ioff)) { mpos = t + patmatchlen(); break; } @@ -3300,7 +3305,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, /* Longest/shortest at end, matching substrings. */ if (!(fl & SUB_LONG)) { set_pat_start(p, l); - if (pattrylen(p, s + l, 0, 0, uml) && !--n) { + if (pattrylen(p, s + l, 0, 0, NULL, uml) && !--n) { *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); return 1; } @@ -3310,7 +3315,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, if (t > s && t[-1] == Meta) t--; set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, ioff) && !--n) { + if (pattrylen(p, t, s + l - t, umlen, NULL, ioff) && !--n) { /* Found the longest match */ char *mpos = t + patmatchlen(); if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { @@ -3319,7 +3324,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, for (ptr = t, umlen2 = 0; ptr < mpos; METAINC(ptr), umlen2++) { set_pat_end(p, *ptr); - if (pattrylen(p, t, ptr - t, umlen2, ioff)) { + if (pattrylen(p, t, ptr - t, umlen2, NULL, ioff)) { mpos = t + patmatchlen(); break; } @@ -3331,7 +3336,8 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, } } set_pat_start(p, l); - if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, uml) && !--n) { + if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, NULL, uml) && + !--n) { *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); return 1; } diff --git a/Src/pattern.c b/Src/pattern.c index af56bd9cc..03ba37d92 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -2022,6 +2022,131 @@ pattrystart(void) errsfound = 0; } +/* + * Allocate memeory for pattern match. Note this is specific to use + * of pattern *and* trial string. + * + * Unmetafy a trial string for use in pattern matching, if needed. + * + * If it is needed, returns a zalloc()'d string; if not needed, returns + * NULL. + * + * prog is the pattern to be executed. + * string is the metafied trial string. + * stringlen is it's length; it will be calculated if it's negative + * (this is a simple strlen()). + * unmetalen is the unmetafied length of the string, may be -1. + * force is 1 if we always unmetafy: this is useful if we are going + * to try again with different versions of the string. If this is + * called from pattryrefs() we don't force unmetafication as it won't + * be optimal. + * In patstralloc (supplied by caller, must last until last pattry is done) + * unmetalen is the unmetafied length of the string; it will be + * calculated if the input value is negative. + * unmetalenp is the umetafied length of a path segment preceeding + * the trial string needed for file mananagement; it is calculated as + * needed so does not need to be initialised. + * alloced is the memory allocated --- same as return value from + * function. + */ +/**/ +mod_export +char *patallocstr(Patprog prog, char *string, int stringlen, int unmetalen, + int force, Patstralloc patstralloc) +{ + int needfullpath; + + /* + * For a top-level ~-exclusion, we will need the full + * path to exclude, so copy the path so far and append the + * current test string. + */ + needfullpath = (prog->flags & PAT_HAS_EXCLUDP) && pathpos; + + /* Get the length of the full string when unmetafied. */ + if (unmetalen < 0) + patstralloc->unmetalen = ztrsub(string + stringlen, string); + else + patstralloc->unmetalen = unmetalen; + if (needfullpath) { + patstralloc->unmetalenp = ztrsub(pathbuf + pathpos, pathbuf); + if (!patstralloc->unmetalenp) + needfullpath = 0; + } else + patstralloc->unmetalenp = 0; + /* Initialise cache area */ + patstralloc->progstrunmeta = NULL; + patstralloc->progstrunmetalen = 0; + + DPUTS(needfullpath && (prog->flags & (PAT_PURES|PAT_ANY)), + "rum sort of file exclusion"); + /* + * Partly for efficiency, and partly for the convenience of + * globbing, we don't unmetafy pure string patterns, and + * there's no reason to if the pattern is just a *. + */ + if (force || + (!(prog->flags & (PAT_PURES|PAT_ANY)) + && (needfullpath || patstralloc->unmetalen != stringlen))) { + /* + * We need to copy if we need to prepend the path so far + * (in which case we copy both chunks), or if we have + * Meta characters. + */ + char *dst, *ptr; + int i, icopy, ncopy; + + dst = patstralloc->alloced = + zalloc(patstralloc->unmetalen + patstralloc->unmetalenp); + + if (needfullpath) { + /* loop twice, copy path buffer first time */ + ptr = pathbuf; + ncopy = patstralloc->unmetalenp; + } else { + /* just loop once, copy string with unmetafication */ + ptr = string; + ncopy = patstralloc->unmetalen; + } + for (icopy = 0; icopy < 2; icopy++) { + for (i = 0; i < ncopy; i++) { + if (*ptr == Meta) { + ptr++; + *dst++ = *ptr++ ^ 32; + } else { + *dst++ = *ptr++; + } + } + if (!needfullpath) + break; + /* next time append test string to path so far */ + ptr = string; + ncopy = patstralloc->unmetalen; + } + } + else + { + patstralloc->alloced = NULL; + } + + return patstralloc->alloced; +} + + +/* + * Free memory allocated by patallocstr(). + */ + +/**/ +mod_export +void patfreestr(Patstralloc patstralloc) +{ + if (patstralloc->alloced) + zfree(patstralloc->alloced, + patstralloc->unmetalen + patstralloc->unmetalenp); +} + + /* * Test prog against null-terminated, metafied string. */ @@ -2030,7 +2155,7 @@ pattrystart(void) mod_export int pattry(Patprog prog, char *string) { - return pattryrefs(prog, string, -1, -1, 0, NULL, NULL, NULL); + return pattryrefs(prog, string, -1, -1, NULL, 0, NULL, NULL, NULL); } /* @@ -2041,9 +2166,11 @@ pattry(Patprog prog, char *string) /**/ mod_export int -pattrylen(Patprog prog, char *string, int len, int unmetalen, int offset) +pattrylen(Patprog prog, char *string, int len, int unmetalen, + Patstralloc patstralloc, int offset) { - return pattryrefs(prog, string, len, unmetalen, offset, NULL, NULL, NULL); + return pattryrefs(prog, string, len, unmetalen, patstralloc, offset, + NULL, NULL, NULL); } /* @@ -2055,14 +2182,32 @@ pattrylen(Patprog prog, char *string, int len, int unmetalen, int offset) * there may be a severe penalty for this if a lot of matching is done * on one string. * - * offset is the position in the original string (not seen by + * If patstralloc is not NULL it is used to optimise unmetafication + * of a trial string that may be passed (or any substring may be passed) to + * pattryrefs multiple times or the same pattern (N.B. so patstralloc + * depends on both prog *and* the trial string). This should only be + * done if there is no path prefix (pathpos == 0) as otherwise the path + * buffer and unmetafied string may not match. To do this, + * patallocstr() is callled (use force = 1 to ensure it is alway + * unmetafied); paststralloc points to existing storage. When all + * pattern matching is done, patfreestr() is called. + * patstralloc->alloced and patstralloc->unmetalen contain the + * unmetafied string and its length. In that case, the rules for the + * earlier arguments change: + * - string is an unmetafied string + * - stringlen is its unmetafied (i.e. actual) length + * - unmetalenin is not used. + * string and stringlen may refer to arbitrary substrings of + * patstralloc->alloced without any internal modification to patstralloc. + * + * patoffset is the position in the original string (not seen by * the pattern module) at which we are trying to match. * This is added in to the positions recorded in patbeginp and patendp * when we are looking for substrings. Currently this only happens * in the parameter substitution code. * - * Note this is a character offset, i.e. a metafied character - * counts as 1. + * Note this is a character offset, i.e. a single possibly metafied and + * possibly multibyte character counts as 1. * * The last three arguments are used to report the positions for the * backreferences. On entry, *nump should contain the maximum number @@ -2075,14 +2220,15 @@ pattrylen(Patprog prog, char *string, int len, int unmetalen, int offset) /**/ mod_export int -pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, - int patoffset, +pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin, + Patstralloc patstralloc, int patoffset, int *nump, int *begp, int *endp) { - int i, maxnpos = 0, ret, needfullpath, unmetalenp; + int i, maxnpos = 0, ret; int origlen; - char **sp, **ep, *tryalloced, *ptr; + char **sp, **ep, *ptr; char *progstr = (char *)prog + prog->startoff; + struct patstralloc patstralloc_struct; if (nump) { maxnpos = *nump; @@ -2091,86 +2237,38 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, /* inherited from domatch, but why, exactly? */ if (*string == Nularg) { string++; - unmetalen--; + if (unmetalenin > 0) + unmetalenin--; + if (stringlen > 0) + stringlen--; } if (stringlen < 0) stringlen = strlen(string); origlen = stringlen; - patflags = prog->flags; - /* - * For a top-level ~-exclusion, we will need the full - * path to exclude, so copy the path so far and append the - * current test string. - */ - needfullpath = (patflags & PAT_HAS_EXCLUDP) && pathpos; - - /* Get the length of the full string when unmetafied. */ - if (unmetalen < 0) - unmetalen = ztrsub(string + stringlen, string); - if (needfullpath) - unmetalenp = ztrsub(pathbuf + pathpos, pathbuf); - else - unmetalenp = 0; - - DPUTS(needfullpath && (patflags & (PAT_PURES|PAT_ANY)), - "rum sort of file exclusion"); - /* - * Partly for efficiency, and partly for the convenience of - * globbing, we don't unmetafy pure string patterns, and - * there's no reason to if the pattern is just a *. - */ - if (!(patflags & (PAT_PURES|PAT_ANY)) - && (needfullpath || unmetalen != stringlen)) { - /* - * We need to copy if we need to prepend the path so far - * (in which case we copy both chunks), or if we have - * Meta characters. - */ - char *dst; - int icopy, ncopy; - - dst = tryalloced = zalloc(unmetalen + unmetalenp); - - if (needfullpath) { - /* loop twice, copy path buffer first time */ - ptr = pathbuf; - ncopy = unmetalenp; - } else { - /* just loop once, copy string with unmetafication */ - ptr = string; - ncopy = unmetalen; - } - for (icopy = 0; icopy < 2; icopy++) { - for (i = 0; i < ncopy; i++) { - if (*ptr == Meta) { - ptr++; - *dst++ = *ptr++ ^ 32; - } else { - *dst++ = *ptr++; - } - } - if (!needfullpath) - break; - /* next time append test string to path so far */ - ptr = string; - ncopy = unmetalen; - } - - if (needfullpath) { - patinstart = tryalloced + unmetalenp; - patinpath = tryalloced; - } else { - patinstart = tryalloced; - patinpath = NULL; - } - stringlen = unmetalen; - } else { + if (patstralloc) { + DPUTS(!patstralloc->alloced, + "External unmetafy didn't actually unmetafy."); + DPUTS(patstralloc->unmetalenp, + "Ooh-err: pathpos with external unmetafy. I have bad vibes."); + patinpath = NULL; patinstart = string; - tryalloced = patinpath = NULL; + /* stringlen is unmetafied length; unmetalenin is ignored */ + } else { + patstralloc = &patstralloc_struct; + if (patallocstr(prog, string, stringlen, unmetalenin, 0, patstralloc)) { + patinstart = patstralloc->alloced + patstralloc->unmetalenp; + stringlen = patstralloc->unmetalen; + } else + patinstart = string; + if (patstralloc->unmetalenp) + patinpath = patstralloc->alloced; + else + patinpath = NULL; } + patflags = prog->flags; patinend = patinstart + stringlen; /* * From now on we do not require NULL termination of @@ -2183,7 +2281,30 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, * Either we are testing against a pure string, * or we can match anything at all. */ - int ret; + int ret, pstrlen; + char *pstr; + if (patstralloc->alloced) + { + /* + * Unmetafied; we need pattern sring that's also unmetafied. + * We'll cache it in the patstralloc structure. + * Note it's on the heap. + */ + if (!patstralloc->progstrunmeta) + { + patstralloc->progstrunmeta = dupstring(progstr); + unmetafy(patstralloc->progstrunmeta, + &patstralloc->progstrunmetalen); + } + pstr = patstralloc->progstrunmeta; + pstrlen = patstralloc->progstrunmetalen; + } + else + { + /* Metafied. */ + pstr = progstr; + pstrlen = (int)prog->patmlen; + } if (prog->flags & PAT_ANY) { /* * Optimisation for a single "*": always matches @@ -2195,11 +2316,11 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, * Testing a pure string. See if initial * components match. */ - int lendiff = stringlen - prog->patmlen; + int lendiff = stringlen - pstrlen; if (lendiff < 0) { /* No, the pattern string is too long. */ ret = 0; - } else if (!memcmp(progstr, patinstart, prog->patmlen)) { + } else if (!memcmp(pstr, patinstart, pstrlen)) { /* * Initial component matches. Matches either * if lengths are the same or we are not anchored @@ -2221,7 +2342,9 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, } else { /* * Remember the length in case used for ${..#..} etc. - * In this case, we didn't unmetafy the string. + * In this case, we didn't unmetafy the pattern string + * In the orignal structure, but it might be unmetafied + * for use with an unmetafied test string. */ patinlen = (int)prog->patmlen; /* if matching files, must update globbing flags */ @@ -2229,16 +2352,26 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, if ((patglobflags & GF_MATCHREF) && !(patflags & PAT_FILE)) { - char *str = ztrduppfx(patinstart, patinlen); + char *str; int mlen; - /* - * Count the characters. We're not using CHARSUB() - * because the string is still metafied. - */ - MB_METACHARINIT(); - mlen = MB_METASTRLEN2END(patinstart, 0, - patinstart + patinlen); + if (patstralloc->alloced) { + /* + * Unmetafied: pstrlen contains unmetafied + * length in bytes. + */ + str = metafy(patinstart, pstrlen, META_ALLOC); + mlen = CHARSUB(patinstart, patinstart + pstrlen); + } else { + str = ztrduppfx(patinstart, patinlen); + /* + * Count the characters. We're not using CHARSUB() + * because the string is still metafied. + */ + MB_METACHARINIT(); + mlen = MB_METASTRLEN2END(patinstart, 0, + patinstart + patinlen); + } setsparam("MATCH", str); setiparam("MBEGIN", @@ -2250,9 +2383,8 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, } } - if (tryalloced) - zfree(tryalloced, unmetalen + unmetalenp); - + if (patstralloc == &patstralloc_struct) + patfreestr(patstralloc); return ret; } else { int q = queue_signal_level(); @@ -2289,8 +2421,8 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, } } if (!ret) { - if (tryalloced) - zfree(tryalloced, unmetalen + unmetalenp); + if (patstralloc == &patstralloc_struct) + patfreestr(patstralloc); return 0; } @@ -2322,8 +2454,11 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, /* * Optimization: if we didn't find any Meta characters * to begin with, we don't need to look for them now. + * Only do this if we did the unmetfication internally, + * since otherwise it's too hard to work out. */ - if (unmetalen != origlen) { + if (patstralloc == &patstralloc_struct && + patstralloc->unmetalen != origlen) { for (ptr = patinstart; ptr < patinput; ptr++) if (imeta(*ptr)) patinlen++; @@ -2444,8 +2579,8 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, restore_queue_signals(q); - if (tryalloced) - zfree(tryalloced, unmetalen + unmetalenp); + if (patstralloc == &patstralloc_struct) + patfreestr(patstralloc); return ret; } diff --git a/Src/zsh.h b/Src/zsh.h index dd0596116..32f2e0cb2 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -491,6 +491,7 @@ typedef struct options *Options; typedef struct optname *Optname; typedef struct param *Param; typedef struct paramdef *Paramdef; +typedef struct patstralloc *Patstralloc; typedef struct patprog *Patprog; typedef struct prepromptfn *Prepromptfn; typedef struct process *Process; @@ -1470,6 +1471,15 @@ struct patprog { char patstartch; }; +struct patstralloc { + int unmetalen; /* Unmetafied length of trial string */ + int unmetalenp; /* Unmetafied length of path prefix. + If 0, no path prefix. */ + char *alloced; /* Allocated string, may be NULL */ + char *progstrunmeta; /* Unmetafied pure string in pattern, cached */ + int progstrunmetalen; /* Length of the foregoing */ +}; + /* Flags used in pattern matchers (Patprog) and passed down to patcompile */ #define PAT_FILE 0x0001 /* Pattern is a file name */ -- cgit v1.2.3 From d07783628e935daab518509db123141ceb535a28 Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Tue, 29 Sep 2015 19:06:43 +0100 Subject: 36700: unmetafy early for parameter match --- Src/glob.c | 436 ++++++++++++++++++++++++++++++++++------------------------ Src/pattern.c | 20 ++- Src/zsh.h | 26 ++++ 3 files changed, 294 insertions(+), 188 deletions(-) (limited to 'Src/glob.c') diff --git a/Src/glob.c b/Src/glob.c index 8bf73520f..0594f0a82 100644 --- a/Src/glob.c +++ b/Src/glob.c @@ -2450,29 +2450,46 @@ matchpat(char *a, char *b) /* please do not laugh at this code. */ /* Having found a match in getmatch, decide what part of string - * to return. The matched part starts b characters into string s - * and finishes e characters in: 0 <= b <= e <= strlen(s) + * to return. The matched part starts b characters into string imd->ustr + * and finishes e characters in: 0 <= b <= e <= imd->ulen on input * (yes, empty matches should work). - * fl is a set of the SUB_* matches defined in zsh.h from SUB_MATCH onwards; - * the lower parts are ignored. - * replstr is the replacement string for a substitution + * + * imd->flags is a set of the SUB_* matches defined in zsh.h from + * SUB_MATCH onwards; the lower parts are ignored. + * + * imd->replstr is the replacement string for a substitution + * + * imd->replstr is metafied and the values put in imd->repllist are metafied. */ /**/ static char * -get_match_ret(char *s, int b, int e, int fl, char *replstr, - LinkList repllist) +get_match_ret(Imatchdata imd, int b, int e) { - char buf[80], *r, *p, *rr; - int ll = 0, l = strlen(s), bl = 0, t = 0, i; - + char buf[80], *r, *p, *rr, *replstr = imd->replstr; + int ll = 0, bl = 0, t = 0, add = 0, fl = imd->flags, i; + + /* Account for b and e referring to unmetafied string */ + for (p = imd->ustr; p < imd->ustr + b; p++) + if (imeta(*p)) + add++; + b += add; + for (; p < imd->ustr + e; p++) + if (imeta(*p)) + add++; + e += add; + for (; p < imd->ustr + imd->ulen; p++) + if (imeta(*p)) + add++; + + /* Everything now refers to meatfied lengths. */ if (replstr || (fl & SUB_LIST)) { if (fl & SUB_DOSUBST) { replstr = dupstring(replstr); singsub(&replstr); untokenize(replstr); } - if ((fl & (SUB_GLOBAL|SUB_LIST)) && repllist) { + if ((fl & (SUB_GLOBAL|SUB_LIST)) && imd->repllist) { /* We are replacing the chunk, just add this to the list */ Repldata rd = (Repldata) ((fl & SUB_LIST) ? zalloc(sizeof(*rd)) : zhalloc(sizeof(*rd))); @@ -2480,30 +2497,32 @@ get_match_ret(char *s, int b, int e, int fl, char *replstr, rd->e = e; rd->replstr = replstr; if (fl & SUB_LIST) - zaddlinknode(repllist, rd); + zaddlinknode(imd->repllist, rd); else - addlinknode(repllist, rd); - return s; + addlinknode(imd->repllist, rd); + return imd->mstr; } ll += strlen(replstr); } if (fl & SUB_MATCH) /* matched portion */ ll += 1 + (e - b); if (fl & SUB_REST) /* unmatched portion */ - ll += 1 + (l - (e - b)); + ll += 1 + (imd->mlen - (e - b)); if (fl & SUB_BIND) { /* position of start of matched portion */ - sprintf(buf, "%d ", MB_METASTRLEN2END(s, 0, s+b) + 1); + sprintf(buf, "%d ", MB_METASTRLEN2END(imd->mstr, 0, imd->mstr+b) + 1); ll += (bl = strlen(buf)); } if (fl & SUB_EIND) { /* position of end of matched portion */ - sprintf(buf + bl, "%d ", MB_METASTRLEN2END(s, 0, s+e) + 1); + sprintf(buf + bl, "%d ", + MB_METASTRLEN2END(imd->mstr, 0, imd->mstr+e) + 1); ll += (bl = strlen(buf)); } if (fl & SUB_LEN) { /* length of matched portion */ - sprintf(buf + bl, "%d ", MB_METASTRLEN2END(s+b, 0, s+e)); + sprintf(buf + bl, "%d ", MB_METASTRLEN2END(imd->mstr+b, 0, + imd->mstr+e)); ll += (bl = strlen(buf)); } if (bl) @@ -2513,7 +2532,7 @@ get_match_ret(char *s, int b, int e, int fl, char *replstr, if (fl & SUB_MATCH) { /* copy matched portion to new buffer */ - for (i = b, p = s + b; i < e; i++) + for (i = b, p = imd->mstr + b; i < e; i++) *rr++ = *p++; t = 1; } @@ -2523,12 +2542,12 @@ get_match_ret(char *s, int b, int e, int fl, char *replstr, if (t) *rr++ = ' '; /* there may be unmatched bits at both beginning and end of string */ - for (i = 0, p = s; i < b; i++) + for (i = 0, p = imd->mstr; i < b; i++) *rr++ = *p++; if (replstr) for (p = replstr; *p; ) *rr++ = *p++; - for (i = e, p = s + e; i < l; i++) + for (i = e, p = imd->mstr + e; i < imd->mlen; i++) *rr++ = *p++; t = 1; } @@ -2710,26 +2729,18 @@ set_pat_end(Patprog p, char null_me) /* * Increment *tp over character which may be multibyte. - * Return number of bytes that remain in the character after unmetafication. + * Return number of bytes. + * All unmetafied here. */ /**/ -static int iincchar(char **tp) +static int iincchar(char **tp, int left) { char *t = *tp; - int mbclen = mb_metacharlenconv(t, NULL); - int umlen = 0; - - while (mbclen--) { - umlen++; - if (*t++ == Meta) { - t++; - mbclen--; - } - } - *tp = t; + int mbclen = mb_charlenconv(t, left, NULL); + *tp = t + mbclen; - return umlen; + return mbclen; } /**/ @@ -2737,7 +2748,7 @@ static int igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, LinkList *repllistp) { - char *s = *sp, *t, *tmatch; + char *s = *sp, *t, *tmatch, *send; /* * Note that ioff counts (possibly multibyte) characters in the * character set (Meta's are not included), while l counts characters in @@ -2752,36 +2763,52 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, */ int ioff, l = strlen(*sp), matched = 1, umltot = ztrlen(*sp); int umlen, nmatches; - /* - * List of bits of matches to concatenate with replacement string. - * The data is a struct repldata. It is not used in cases like - * ${...//#foo/bar} even though SUB_GLOBAL is set, since the match - * is anchored. It goes on the heap. - */ - LinkList repllist = NULL; + struct patstralloc patstralloc; + struct imatchdata imd; + + (void)patallocstr(p, s, l, umltot, 1, &patstralloc); + s = patstralloc.alloced; + DPUTS(!s, "forced patallocstr failed"); + send = s + umltot; + + imd.mstr = *sp; + imd.mlen = l; + imd.ustr = s; + imd.ulen = umltot; + imd.flags = fl; + imd.replstr = replstr; + imd.repllist = NULL; /* perform must-match test for complex closures */ if (p->mustoff) { - /* - * Yuk. Probably we should rewrite this whole function to - * use an unmetafied test string. - * - * Use META_HEAPDUP because we need a terminating NULL. - */ - char *muststr = metafy((char *)p + p->mustoff, - p->patmlen, META_HEAPDUP); + char *muststr = (char *)p + p->mustoff; - if (!strstr(s, muststr)) - matched = 0; + matched = 0; + if (p->patmlen <= umltot) + { + for (t = s; t <= send - p->patmlen; t++) + { + if (!memcmp(muststr, t, p->patmlen)) { + matched = 1; + break; + } + } + } } /* in case we used the prog before... */ p->flags &= ~(PAT_NOTSTART|PAT_NOTEND); if (fl & SUB_ALL) { - int i = matched && pattrylen(p, s, -1, -1, NULL, 0); - *sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0, NULL); + int i = matched && pattrylen(p, s, umltot, 0, &patstralloc, 0); + if (!i) { + /* Perform under no-match conditions */ + umltot = 0; + imd.replstr = NULL; + } + *sp = get_match_ret(&imd, 0, umltot); + patfreestr(&patstralloc); if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i))) return 0; return 1; @@ -2809,25 +2836,27 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, * Largest/smallest possible match at head of string. * First get the longest match... */ - if (pattrylen(p, s, -1, -1, NULL, 0)) { - /* patmatchlen returns metafied length, as we need */ + if (pattrylen(p, s, umltot, 0, &patstralloc, 0)) { + /* patmatchlen returns unmetafied length in this case */ int mlen = patmatchlen(); if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { + send = s + mlen; /* * ... now we know whether it's worth looking for the * shortest, which we do by brute force. */ mb_charinit(); - for (t = s, umlen = 0; t < s + mlen; ) { + for (t = s, umlen = 0; t < send; ) { set_pat_end(p, *t); - if (pattrylen(p, s, t - s, umlen, NULL, 0)) { + if (pattrylen(p, s, umlen, 0, &patstralloc, 0)) { mlen = patmatchlen(); break; } - umlen += iincchar(&t); + umlen += iincchar(&t, send - t); } } - *sp = get_match_ret(*sp, 0, mlen, fl, replstr, NULL); + *sp = get_match_ret(&imd, 0, mlen); + patfreestr(&patstralloc); return 1; } break; @@ -2845,20 +2874,23 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, */ mb_charinit(); tmatch = NULL; - for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) { + for (ioff = 0, t = s, umlen = umltot; t < send; ioff++) { set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) + if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) tmatch = t; if (fl & SUB_START) break; - umlen -= iincchar(&t); + umlen -= iincchar(&t, send - t); } if (tmatch) { - *sp = get_match_ret(*sp, tmatch - s, l, fl, replstr, NULL); + *sp = get_match_ret(&imd, tmatch - s, umltot); + patfreestr(&patstralloc); return 1; } - if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, NULL, ioff)) { - *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); + if (!(fl & SUB_START) && pattrylen(p, s + umltot, 0, 0, + &patstralloc, ioff)) { + *sp = get_match_ret(&imd, umltot, umltot); + patfreestr(&patstralloc); return 1; } break; @@ -2868,18 +2900,21 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, * move forward along string until we get a match. * * Again there's no optimisation. */ mb_charinit(); - for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) { + for (ioff = 0, t = s, umlen = umltot; t < send ; ioff++) { set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) { - *sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL); + if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) { + *sp = get_match_ret(&imd, t-s, umltot); + patfreestr(&patstralloc); return 1; } if (fl & SUB_START) break; - umlen -= iincchar(&t); + umlen -= iincchar(&t, send - t); } - if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, NULL, ioff)) { - *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); + if (!(fl & SUB_START) && pattrylen(p, send, 0, 0, + &patstralloc, ioff)) { + *sp = get_match_ret(&imd, umltot, umltot); + patfreestr(&patstralloc); return 1; } break; @@ -2887,18 +2922,20 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, case SUB_SUBSTR: /* Smallest at start, but matching substrings. */ set_pat_start(p, l); - if (!(fl & SUB_GLOBAL) && pattrylen(p, s + l, -1, -1, NULL, 0) && + if (!(fl & SUB_GLOBAL) && + pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) { - *sp = get_match_ret(*sp, 0, 0, fl, replstr, NULL); + *sp = get_match_ret(&imd, 0, 0); + patfreestr(&patstralloc); return 1; } /* fall through */ case (SUB_SUBSTR|SUB_LONG): /* longest or smallest at start with substrings */ t = s; if (fl & SUB_GLOBAL) { - repllist = (fl & SUB_LIST) ? znewlinklist() : newlinklist(); + imd.repllist = (fl & SUB_LIST) ? znewlinklist() : newlinklist(); if (repllistp) - *repllistp = repllist; + *repllistp = imd.repllist; } ioff = 0; /* offset into string */ umlen = umltot; @@ -2906,10 +2943,10 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, do { /* loop over all matches for global substitution */ matched = 0; - for (; t < s + l; ioff++) { + for (; t < send; ioff++) { /* Find the longest match from this position. */ set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) { + if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) { char *mpos = t + patmatchlen(); if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { char *ptr; @@ -2923,19 +2960,18 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, */ for (ptr = t, umlen2 = 0; ptr < mpos;) { set_pat_end(p, *ptr); - if (pattrylen(p, t, ptr - t, umlen2, - NULL, ioff)) { + if (pattrylen(p, t, umlen2, 0, + &patstralloc, ioff)) { mpos = t + patmatchlen(); break; } - umlen2 += iincchar(&ptr); + umlen2 += iincchar(&ptr, mpos - ptr); } } if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) { - *sp = get_match_ret(*sp, t-s, mpos-s, fl, - replstr, repllist); + *sp = get_match_ret(&imd, t-s, mpos-s); if (mpos == t) - mpos += mb_metacharlenconv(mpos, NULL); + mpos += mb_charlenconv(mpos, send - mpos, NULL); } if (!(fl & SUB_GLOBAL)) { if (n) { @@ -2945,9 +2981,10 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, * the next character, even if it overlaps * with what we just found. */ - umlen -= iincchar(&t); + umlen -= iincchar(&t, send - t); continue; } else { + patfreestr(&patstralloc); return 1; } } @@ -2958,11 +2995,11 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, matched = 1; while (t < mpos) { ioff++; - umlen -= iincchar(&t); + umlen -= iincchar(&t, send - t); } break; } - umlen -= iincchar(&t); + umlen -= iincchar(&t, send - t); } } while (matched); /* @@ -2972,8 +3009,9 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, */ set_pat_start(p, l); if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG && - pattrylen(p, s + l, -1, -1, NULL, 0) && !--n) { - *sp = get_match_ret(*sp, 0, 0, fl, replstr, repllist); + pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) { + *sp = get_match_ret(&imd, 0, 0); + patfreestr(&patstralloc); return 1; } break; @@ -2983,8 +3021,10 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, /* Longest/shortest at end, matching substrings. */ if (!(fl & SUB_LONG)) { set_pat_start(p, l); - if (pattrylen(p, s + l, 0, 0, NULL, umltot) && !--n) { - *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); + if (pattrylen(p, send, 0, 0, &patstralloc, umltot) && + !--n) { + *sp = get_match_ret(&imd, umltot, umltot); + patfreestr(&patstralloc); return 1; } } @@ -3001,13 +3041,13 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, nmatches = 0; tmatch = NULL; mb_charinit(); - for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) { + for (ioff = 0, t = s, umlen = umltot; t < send; ioff++) { set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) { + if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) { nmatches++; tmatch = t; } - umlen -= iincchar(&t); + umlen -= iincchar(&t, send - t); } if (nmatches) { char *mpos; @@ -3017,14 +3057,14 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, */ n = nmatches - n; mb_charinit(); - for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) { + for (ioff = 0, t = s, umlen = umltot; t < send; ioff++) { set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, NULL, ioff) && + if (pattrylen(p, t, umlen, 0, &patstralloc, ioff) && !n--) { tmatch = t; break; } - umlen -= iincchar(&t); + umlen -= iincchar(&t, send - t); } } mpos = tmatch + patmatchlen(); @@ -3032,29 +3072,31 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { for (t = tmatch, umlen = 0; t < mpos; ) { set_pat_end(p, *t); - if (pattrylen(p, tmatch, t - tmatch, umlen, - NULL, ioff)) { + if (pattrylen(p, tmatch, umlen, 0, + &patstralloc, ioff)) { mpos = tmatch + patmatchlen(); break; } - umlen += iincchar(&t); + umlen += iincchar(&t, mpos - t); } } - *sp = get_match_ret(*sp, tmatch-s, mpos-s, fl, - replstr, NULL); + *sp = get_match_ret(&imd, tmatch-s, mpos-s); + patfreestr(&patstralloc); return 1; } set_pat_start(p, l); - if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, NULL, umltot) && + if ((fl & SUB_LONG) && pattrylen(p, send, 0, 0, + &patstralloc, umltot) && !--n) { - *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); + *sp = get_match_ret(&imd, umltot, umltot); + patfreestr(&patstralloc); return 1; } break; } } - if (repllist && nonempty(repllist)) { + if (imd.repllist && nonempty(imd.repllist)) { /* Put all the bits of a global search and replace together. */ LinkNode nd; Repldata rd; @@ -3062,10 +3104,15 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, char *ptr, *start; int i; + /* + * Use metafied string again. + * Results from get_match_ret in repllist are all metafied. + */ + s = *sp; if (!(fl & SUB_LIST)) { lleft = 0; /* size of returned string */ - i = 0; /* start of last chunk we got from *sp */ - for (nd = firstnode(repllist); nd; incnode(nd)) { + i = 0; /* start of last chunk we got from *sp */ + for (nd = firstnode(imd.repllist); nd; incnode(nd)) { rd = (Repldata) getdata(nd); lleft += rd->b - i; /* previous chunk of *sp */ lleft += strlen(rd->replstr); /* the replaced bit */ @@ -3074,7 +3121,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, lleft += l - i; /* final chunk from *sp */ start = t = zhalloc(lleft+1); i = 0; - for (nd = firstnode(repllist); nd; incnode(nd)) { + for (nd = firstnode(imd.repllist); nd; incnode(nd)) { rd = (Repldata) getdata(nd); memcpy(t, s + i, rd->b - i); t += rd->b - i; @@ -3087,13 +3134,19 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, start[lleft] = '\0'; *sp = (char *)start; } + patfreestr(&patstralloc); return 1; } - if (fl & SUB_LIST) /* safety: don't think this can happen */ + if (fl & SUB_LIST) { /* safety: don't think this can happen */ + patfreestr(&patstralloc); return 0; + } /* munge the whole string: no match, so no replstr */ - *sp = get_match_ret(*sp, 0, 0, fl, 0, 0); + imd.replstr = NULL; + imd.repllist = NULL; + *sp = get_match_ret(&imd, 0, 0); + patfreestr(&patstralloc); return (fl & SUB_RETFAIL) ? 0 : 1; } @@ -3111,7 +3164,7 @@ static int igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, LinkList *repllistp) { - char *s = *sp, *t; + char *s = *sp, *t, *send; /* * Note that ioff and uml count characters in the character * set (Meta's are not included), while l counts characters in the @@ -3119,36 +3172,48 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, * lengths. */ int ioff, l = strlen(*sp), uml = ztrlen(*sp), matched = 1, umlen; - /* - * List of bits of matches to concatenate with replacement string. - * The data is a struct repldata. It is not used in cases like - * ${...//#foo/bar} even though SUB_GLOBAL is set, since the match - * is anchored. It goes on the heap. - */ - LinkList repllist = NULL; + struct patstralloc patstralloc; + struct imatchdata imd; + + (void)patallocstr(p, s, l, uml, 1, &patstralloc); + s = patstralloc.alloced; + DPUTS(!s, "forced patallocstr failed"); + send = s + uml; + + imd.mstr = *sp; + imd.mlen = l; + imd.ustr = s; + imd.ulen = uml; + imd.flags = fl; + imd.replstr = replstr; + imd.repllist = NULL; /* perform must-match test for complex closures */ if (p->mustoff) { - /* - * Yuk. Probably we should rewrite this whole function to - * use an unmetafied test string. - * - * Use META_HEAPDUP because we need a terminating NULL. - */ - char *muststr = metafy((char *)p + p->mustoff, - p->patmlen, META_HEAPDUP); + char *muststr = (char *)p + p->mustoff; - if (!strstr(s, muststr)) - matched = 0; + matched = 0; + if (p->patmlen <= uml) + { + for (t = s; t <= send - p->patmlen; t++) + { + if (!memcmp(muststr, t, p->patmlen)) { + matched = 1; + break; + } + } + } } /* in case we used the prog before... */ p->flags &= ~(PAT_NOTSTART|PAT_NOTEND); if (fl & SUB_ALL) { - int i = matched && pattry(p, s); - *sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0, NULL); + int i = matched && pattrylen(p, s, uml, 0, &patstralloc, 0); + if (!i) + imd.replstr = NULL; + *sp = get_match_ret(&imd, 0, i ? l : 0); if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i))) return 0; return 1; @@ -3161,23 +3226,25 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, * Largest/smallest possible match at head of string. * First get the longest match... */ - if (pattry(p, s)) { + if (pattrylen(p, s, uml, 0, &patstralloc, 0)) { /* patmatchlen returns metafied length, as we need */ int mlen = patmatchlen(); if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { + send = s + mlen; /* * ... now we know whether it's worth looking for the * shortest, which we do by brute force. */ for (t = s, umlen = 0; t < s + mlen; METAINC(t), umlen++) { set_pat_end(p, *t); - if (pattrylen(p, s, t - s, umlen, NULL, 0)) { + if (pattrylen(p, s, umlen, 0, &patstralloc, 0)) { mlen = patmatchlen(); break; } } } - *sp = get_match_ret(*sp, 0, mlen, fl, replstr, NULL); + *sp = get_match_ret(&imd, 0, mlen); + patfreestr(&patstralloc); return 1; } break; @@ -3186,17 +3253,13 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, /* Smallest possible match at tail of string: * * move back down string until we get a match. * * There's no optimization here. */ - for (ioff = uml, t = s + l, umlen = 0; t >= s; + for (ioff = uml, t = send, umlen = 0; t >= s; t--, ioff--, umlen++) { - if (t > s && t[-1] == Meta) - t--; set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) { - *sp = get_match_ret(*sp, t - s, l, fl, replstr, NULL); + if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) { + *sp = get_match_ret(&imd, t - s, uml); return 1; } - if (t > s+1 && t[-2] == Meta) - t--; } break; @@ -3204,61 +3267,59 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, /* Largest possible match at tail of string: * * move forward along string until we get a match. * * Again there's no optimisation. */ - for (ioff = 0, t = s, umlen = uml; t < s + l; - ioff++, METAINC(t), umlen--) { + for (ioff = 0, t = s, umlen = uml; t < send; + ioff++, t++, umlen--) { set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) { - *sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL); + if (pattrylen(p, t, send - t, umlen, &patstralloc, ioff)) { + *sp = get_match_ret(&imd, t-s, uml); return 1; } - if (*t == Meta) - t++; } break; case SUB_SUBSTR: /* Smallest at start, but matching substrings. */ set_pat_start(p, l); - if (!(fl & SUB_GLOBAL) && pattry(p, s + l) && !--n) { - *sp = get_match_ret(*sp, 0, 0, fl, replstr, NULL); + if (!(fl & SUB_GLOBAL) && + pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) { + *sp = get_match_ret(&imd, 0, 0); return 1; } /* fall through */ case (SUB_SUBSTR|SUB_LONG): /* longest or smallest at start with substrings */ t = s; if (fl & SUB_GLOBAL) { - repllist = newlinklist(); + imd.repllist = newlinklist(); if (repllistp) - *repllistp = repllist; + *repllistp = imd.repllist; } ioff = 0; /* offset into string */ umlen = uml; do { /* loop over all matches for global substitution */ matched = 0; - for (; t < s + l; METAINC(t), ioff++, umlen--) { + for (; t < send; t++, ioff++, umlen--) { /* Find the longest match from this position. */ set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, NULL, ioff)) { + if (pattrylen(p, t, send - t, umlen, &patstralloc, ioff)) { char *mpos = t + patmatchlen(); if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { char *ptr; int umlen2; for (ptr = t, umlen2 = 0; ptr < mpos; - METAINC(ptr), umlen2++) { + ptr++, umlen2++) { set_pat_end(p, *ptr); if (pattrylen(p, t, ptr - t, umlen2, - NULL, ioff)) { + &patstralloc, ioff)) { mpos = t + patmatchlen(); break; } } } if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) { - *sp = get_match_ret(*sp, t-s, mpos-s, fl, - replstr, repllist); + *sp = get_match_ret(&imd, t-s, mpos-s); if (mpos == t) - METAINC(mpos); + mpos++; } if (!(fl & SUB_GLOBAL)) { if (n) { @@ -3278,13 +3339,13 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, * which is already marked for replacement. */ matched = 1; - for ( ; t < mpos; t++, ioff++, umlen--) - if (*t == Meta) - t++; + while (t < mpos) { + ioff++; + umlen--; + t++; + } break; } - if (*t == Meta) - t++; } } while (matched); /* @@ -3294,8 +3355,9 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, */ set_pat_start(p, l); if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG && - pattry(p, s + l) && !--n) { - *sp = get_match_ret(*sp, 0, 0, fl, replstr, repllist); + pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) { + *sp = get_match_ret(&imd, 0, 0); + patfreestr(&patstralloc); return 1; } break; @@ -3305,47 +3367,50 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, /* Longest/shortest at end, matching substrings. */ if (!(fl & SUB_LONG)) { set_pat_start(p, l); - if (pattrylen(p, s + l, 0, 0, NULL, uml) && !--n) { - *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); + if (pattrylen(p, send, 0, 0, &patstralloc, uml) && !--n) { + *sp = get_match_ret(&imd, uml, uml); + patfreestr(&patstralloc); return 1; } } - for (ioff = uml - 1, t = s + l - 1, umlen = 1; t >= s; + for (ioff = uml - 1, t = send - 1, umlen = 1; t >= s; t--, ioff--, umlen++) { - if (t > s && t[-1] == Meta) - t--; set_pat_start(p, t-s); - if (pattrylen(p, t, s + l - t, umlen, NULL, ioff) && !--n) { + if (pattrylen(p, t, send - t, umlen, &patstralloc, ioff) && + !--n) { /* Found the longest match */ char *mpos = t + patmatchlen(); if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) { char *ptr; int umlen2; for (ptr = t, umlen2 = 0; ptr < mpos; - METAINC(ptr), umlen2++) { + ptr++, umlen2++) { set_pat_end(p, *ptr); - if (pattrylen(p, t, ptr - t, umlen2, NULL, ioff)) { + if (pattrylen(p, t, umlen2, 0, &patstralloc, + ioff)) { mpos = t + patmatchlen(); break; } } } - *sp = get_match_ret(*sp, t-s, mpos-s, fl, - replstr, NULL); + *sp = get_match_ret(&imd, t-s, mpos-s); + patfreestr(&patstralloc); return 1; } } set_pat_start(p, l); - if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, NULL, uml) && + if ((fl & SUB_LONG) && pattrylen(p, send, 0, 0, + &patstralloc, uml) && !--n) { - *sp = get_match_ret(*sp, l, l, fl, replstr, NULL); + *sp = get_match_ret(&imd, uml, uml); + patfreestr(&patstralloc); return 1; } break; } } - if (repllist && nonempty(repllist)) { + if (imd.repllist && nonempty(imd.repllist)) { /* Put all the bits of a global search and replace together. */ LinkNode nd; Repldata rd; @@ -3353,8 +3418,13 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, char *ptr, *start; int i; + /* + * Use metafied string again. + * Results from get_match_ret in repllist are all metafied. + */ + s = *sp; i = 0; /* start of last chunk we got from *sp */ - for (nd = firstnode(repllist); nd; incnode(nd)) { + for (nd = firstnode(imd.repllist); nd; incnode(nd)) { rd = (Repldata) getdata(nd); lleft += rd->b - i; /* previous chunk of *sp */ lleft += strlen(rd->replstr); /* the replaced bit */ @@ -3363,7 +3433,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, lleft += l - i; /* final chunk from *sp */ start = t = zhalloc(lleft+1); i = 0; - for (nd = firstnode(repllist); nd; incnode(nd)) { + for (nd = firstnode(imd.repllist); nd; incnode(nd)) { rd = (Repldata) getdata(nd); memcpy(t, s + i, rd->b - i); t += rd->b - i; @@ -3375,11 +3445,15 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, memcpy(t, s + i, l - i); start[lleft] = '\0'; *sp = (char *)start; + patfreestr(&patstralloc); return 1; } /* munge the whole string: no match, so no replstr */ - *sp = get_match_ret(*sp, 0, 0, fl, 0, 0); + imd.replstr = NULL; + imd.repllist = NULL; + *sp = get_match_ret(&imd, 0, 0); + patfreestr(&patstralloc); return 1; } diff --git a/Src/pattern.c b/Src/pattern.c index 03ba37d92..8de372c9e 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -2204,7 +2204,10 @@ pattrylen(Patprog prog, char *string, int len, int unmetalen, * the pattern module) at which we are trying to match. * This is added in to the positions recorded in patbeginp and patendp * when we are looking for substrings. Currently this only happens - * in the parameter substitution code. + * in the parameter substitution code. It refers to a real character + * offset, i.e. is already in the form ready for presentation to the + * general public --- this is necessary as we don't have the + * information to convert it down here. * * Note this is a character offset, i.e. a single possibly metafied and * possibly multibyte character counts as 1. @@ -2292,7 +2295,8 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin, */ if (!patstralloc->progstrunmeta) { - patstralloc->progstrunmeta = dupstring(progstr); + patstralloc->progstrunmeta = + dupstrpfx(progstr, (int)prog->patmlen); unmetafy(patstralloc->progstrunmeta, &patstralloc->progstrunmetalen); } @@ -2346,7 +2350,7 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin, * In the orignal structure, but it might be unmetafied * for use with an unmetafied test string. */ - patinlen = (int)prog->patmlen; + patinlen = pstrlen; /* if matching files, must update globbing flags */ patglobflags = prog->globend; @@ -2360,7 +2364,7 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin, * Unmetafied: pstrlen contains unmetafied * length in bytes. */ - str = metafy(patinstart, pstrlen, META_ALLOC); + str = metafy(patinstart, pstrlen, META_DUP); mlen = CHARSUB(patinstart, patinstart + pstrlen); } else { str = ztrduppfx(patinstart, patinlen); @@ -2454,8 +2458,8 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin, /* * Optimization: if we didn't find any Meta characters * to begin with, we don't need to look for them now. - * Only do this if we did the unmetfication internally, - * since otherwise it's too hard to work out. + * + * For patstralloc pased in, we want the unmetafied length. */ if (patstralloc == &patstralloc_struct && patstralloc->unmetalen != origlen) { @@ -2588,7 +2592,9 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin, /* * Return length of previous succesful match. This is - * in metafied bytes, i.e. includes a count of Meta characters. + * in metafied bytes, i.e. includes a count of Meta characters, + * unless the match was done on an unmetafied string using + * a patstralloc stuct, in which case it, too is unmetafed. * Unusual and futile attempt at modular encapsulation. */ diff --git a/Src/zsh.h b/Src/zsh.h index 32f2e0cb2..15fa5e417 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -480,6 +480,7 @@ typedef struct heap *Heap; typedef struct heapstack *Heapstack; typedef struct histent *Histent; typedef struct hookdef *Hookdef; +typedef struct imatchdata *Imatchdata; typedef struct jobfile *Jobfile; typedef struct job *Job; typedef struct linkedmod *Linkedmod; @@ -1593,6 +1594,31 @@ typedef struct zpc_disables_save *Zpc_disables_save; /* Range: token followed by the (possibly multibyte) start and end */ #define PP_RANGE 21 +/* + * Argument to get_match_ret() in glob.c + */ +struct imatchdata { + /* Metafied trial string */ + char *mstr; + /* Its length */ + int mlen; + /* Unmetafied string */ + char *ustr; + /* Its length */ + int ulen; + /* Flags (SUB_*) */ + int flags; + /* Replacement string (metafied) */ + char *replstr; + /* + * List of bits of matches to concatenate with replacement string. + * The data is a struct repldata. It is not used in cases like + * ${...//#foo/bar} even though SUB_GLOBAL is set, since the match + * is anchored. It goes on the heap. + */ + LinkList repllist; +}; + /* Globbing flags: lower 8 bits gives approx count */ #define GF_LCMATCHUC 0x0100 #define GF_IGNCASE 0x0200 -- cgit v1.2.3 From a5a6c58ff883edc202bd0103af1e09848c9b8fe7 Mon Sep 17 00:00:00 2001 From: "Barton E. Schaefer" Date: Tue, 29 Sep 2015 13:36:43 -0700 Subject: unposted: fix typo in comment. --- Src/glob.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Src/glob.c') diff --git a/Src/glob.c b/Src/glob.c index 0594f0a82..d9986634a 100644 --- a/Src/glob.c +++ b/Src/glob.c @@ -2482,7 +2482,7 @@ get_match_ret(Imatchdata imd, int b, int e) if (imeta(*p)) add++; - /* Everything now refers to meatfied lengths. */ + /* Everything now refers to metafied lengths. */ if (replstr || (fl & SUB_LIST)) { if (fl & SUB_DOSUBST) { replstr = dupstring(replstr); -- cgit v1.2.3 From 533658730745ae6261f9552aba01dc5346405d30 Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Wed, 30 Sep 2015 10:19:16 +0100 Subject: 36711: Allocate unmetafied pattern trial string on the heap --- ChangeLog | 5 +++++ Src/glob.c | 25 ------------------------- Src/pattern.c | 34 +++++++--------------------------- 3 files changed, 12 insertions(+), 52 deletions(-) (limited to 'Src/glob.c') diff --git a/ChangeLog b/ChangeLog index 38e0e4627..56e725cf0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2015-09-30 Peter Stephenson + + * 36711: Src/glob.c, Src/pattern.c: Memory for early unmetafied + pattern trial string is on the heap. + 2015-09-28 Peter Stephenson * 36682: Src/glob.c, Src/pattern.c, Src/zsh.h, diff --git a/Src/glob.c b/Src/glob.c index d9986634a..24e60d0c5 100644 --- a/Src/glob.c +++ b/Src/glob.c @@ -2478,9 +2478,6 @@ get_match_ret(Imatchdata imd, int b, int e) if (imeta(*p)) add++; e += add; - for (; p < imd->ustr + imd->ulen; p++) - if (imeta(*p)) - add++; /* Everything now refers to metafied lengths. */ if (replstr || (fl & SUB_LIST)) { @@ -2808,7 +2805,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, imd.replstr = NULL; } *sp = get_match_ret(&imd, 0, umltot); - patfreestr(&patstralloc); if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i))) return 0; return 1; @@ -2856,7 +2852,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, } } *sp = get_match_ret(&imd, 0, mlen); - patfreestr(&patstralloc); return 1; } break; @@ -2884,13 +2879,11 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, } if (tmatch) { *sp = get_match_ret(&imd, tmatch - s, umltot); - patfreestr(&patstralloc); return 1; } if (!(fl & SUB_START) && pattrylen(p, s + umltot, 0, 0, &patstralloc, ioff)) { *sp = get_match_ret(&imd, umltot, umltot); - patfreestr(&patstralloc); return 1; } break; @@ -2904,7 +2897,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, set_pat_start(p, t-s); if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) { *sp = get_match_ret(&imd, t-s, umltot); - patfreestr(&patstralloc); return 1; } if (fl & SUB_START) @@ -2914,7 +2906,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, if (!(fl & SUB_START) && pattrylen(p, send, 0, 0, &patstralloc, ioff)) { *sp = get_match_ret(&imd, umltot, umltot); - patfreestr(&patstralloc); return 1; } break; @@ -2926,7 +2917,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) { *sp = get_match_ret(&imd, 0, 0); - patfreestr(&patstralloc); return 1; } /* fall through */ case (SUB_SUBSTR|SUB_LONG): @@ -2984,7 +2974,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, umlen -= iincchar(&t, send - t); continue; } else { - patfreestr(&patstralloc); return 1; } } @@ -3011,7 +3000,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG && pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) { *sp = get_match_ret(&imd, 0, 0); - patfreestr(&patstralloc); return 1; } break; @@ -3024,7 +3012,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, if (pattrylen(p, send, 0, 0, &patstralloc, umltot) && !--n) { *sp = get_match_ret(&imd, umltot, umltot); - patfreestr(&patstralloc); return 1; } } @@ -3081,7 +3068,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, } } *sp = get_match_ret(&imd, tmatch-s, mpos-s); - patfreestr(&patstralloc); return 1; } set_pat_start(p, l); @@ -3089,7 +3075,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, &patstralloc, umltot) && !--n) { *sp = get_match_ret(&imd, umltot, umltot); - patfreestr(&patstralloc); return 1; } break; @@ -3134,11 +3119,9 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, start[lleft] = '\0'; *sp = (char *)start; } - patfreestr(&patstralloc); return 1; } if (fl & SUB_LIST) { /* safety: don't think this can happen */ - patfreestr(&patstralloc); return 0; } @@ -3146,7 +3129,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, imd.replstr = NULL; imd.repllist = NULL; *sp = get_match_ret(&imd, 0, 0); - patfreestr(&patstralloc); return (fl & SUB_RETFAIL) ? 0 : 1; } @@ -3244,7 +3226,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, } } *sp = get_match_ret(&imd, 0, mlen); - patfreestr(&patstralloc); return 1; } break; @@ -3357,7 +3338,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG && pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) { *sp = get_match_ret(&imd, 0, 0); - patfreestr(&patstralloc); return 1; } break; @@ -3369,7 +3349,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, set_pat_start(p, l); if (pattrylen(p, send, 0, 0, &patstralloc, uml) && !--n) { *sp = get_match_ret(&imd, uml, uml); - patfreestr(&patstralloc); return 1; } } @@ -3394,7 +3373,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, } } *sp = get_match_ret(&imd, t-s, mpos-s); - patfreestr(&patstralloc); return 1; } } @@ -3403,7 +3381,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, &patstralloc, uml) && !--n) { *sp = get_match_ret(&imd, uml, uml); - patfreestr(&patstralloc); return 1; } break; @@ -3445,7 +3422,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, memcpy(t, s + i, l - i); start[lleft] = '\0'; *sp = (char *)start; - patfreestr(&patstralloc); return 1; } @@ -3453,7 +3429,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr, imd.replstr = NULL; imd.repllist = NULL; *sp = get_match_ret(&imd, 0, 0); - patfreestr(&patstralloc); return 1; } diff --git a/Src/pattern.c b/Src/pattern.c index 8de372c9e..68a340919 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -2028,8 +2028,8 @@ pattrystart(void) * * Unmetafy a trial string for use in pattern matching, if needed. * - * If it is needed, returns a zalloc()'d string; if not needed, returns - * NULL. + * If it is needed, returns a heap allocated string; if not needed, + * returns NULL. * * prog is the pattern to be executed. * string is the metafied trial string. @@ -2046,7 +2046,7 @@ pattrystart(void) * unmetalenp is the umetafied length of a path segment preceeding * the trial string needed for file mananagement; it is calculated as * needed so does not need to be initialised. - * alloced is the memory allocated --- same as return value from + * alloced is the memory allocated on the heap --- same as return value from * function. */ /**/ @@ -2097,7 +2097,7 @@ char *patallocstr(Patprog prog, char *string, int stringlen, int unmetalen, int i, icopy, ncopy; dst = patstralloc->alloced = - zalloc(patstralloc->unmetalen + patstralloc->unmetalenp); + zhalloc(patstralloc->unmetalen + patstralloc->unmetalenp); if (needfullpath) { /* loop twice, copy path buffer first time */ @@ -2133,20 +2133,6 @@ char *patallocstr(Patprog prog, char *string, int stringlen, int unmetalen, } -/* - * Free memory allocated by patallocstr(). - */ - -/**/ -mod_export -void patfreestr(Patstralloc patstralloc) -{ - if (patstralloc->alloced) - zfree(patstralloc->alloced, - patstralloc->unmetalen + patstralloc->unmetalenp); -} - - /* * Test prog against null-terminated, metafied string. */ @@ -2189,8 +2175,9 @@ pattrylen(Patprog prog, char *string, int len, int unmetalen, * done if there is no path prefix (pathpos == 0) as otherwise the path * buffer and unmetafied string may not match. To do this, * patallocstr() is callled (use force = 1 to ensure it is alway - * unmetafied); paststralloc points to existing storage. When all - * pattern matching is done, patfreestr() is called. + * unmetafied); paststralloc points to existing storage. Memory is + * on the heap. + * * patstralloc->alloced and patstralloc->unmetalen contain the * unmetafied string and its length. In that case, the rules for the * earlier arguments change: @@ -2387,8 +2374,6 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin, } } - if (patstralloc == &patstralloc_struct) - patfreestr(patstralloc); return ret; } else { int q = queue_signal_level(); @@ -2425,8 +2410,6 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin, } } if (!ret) { - if (patstralloc == &patstralloc_struct) - patfreestr(patstralloc); return 0; } @@ -2583,9 +2566,6 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin, restore_queue_signals(q); - if (patstralloc == &patstralloc_struct) - patfreestr(patstralloc); - return ret; } } -- cgit v1.2.3 From 58f4cccb1fbd66b7645178af971cb317cf1a2d7a Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Fri, 30 Oct 2015 12:28:07 +0000 Subject: 37022: add GLOB_STAR_SHORT option to abbreviate ** and *** --- ChangeLog | 6 ++++++ Doc/Zsh/expn.yo | 12 ++++++++++++ Doc/Zsh/options.yo | 15 +++++++++++++++ Src/glob.c | 43 +++++++++++++++++++++++++------------------ Src/options.c | 1 + Src/zsh.h | 1 + 6 files changed, 60 insertions(+), 18 deletions(-) (limited to 'Src/glob.c') diff --git a/ChangeLog b/ChangeLog index ba9113487..0ef6fe6ee 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2015-10-30 Peter Stephenson + + * 37022: Doc/Zsh/expn.yo, Doc/Zsh/options.yo, Src/glob.c, + Src/options.c, Src/zsh.h: add GLOB_STAR_SHORT option to + allow shorthand ** for **/* and *** for ***/*. + 2015-10-29 Peter Stephenson * 37018: Src/math.c, Src/params.c, Test/E01options.ztst: make diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo index 5ea8610f2..20e0c8d35 100644 --- a/Doc/Zsh/expn.yo +++ b/Doc/Zsh/expn.yo @@ -2381,6 +2381,18 @@ follow symbolic links; the alternative form `tt(***/)' does, but is otherwise identical. Neither of these can be combined with other forms of globbing within the same path segment; in that case, the `tt(*)' operators revert to their usual effect. + +Even shorter forms are available when the option tt(GLOB_STAR_SHORT) is +set. In that case if no tt(/) immediately follows a tt(**) or tt(***) +they are treated as if both a tt(/) plus a further tt(*) are present. +Hence: + +example(setopt GLOBSTARSHORT +ls **.c) + +is equivalent to + +example(ls **/*.c) subsect(Glob Qualifiers) cindex(globbing, qualifiers) cindex(qualifiers, globbing) diff --git a/Doc/Zsh/options.yo b/Doc/Zsh/options.yo index fbf65abbc..60379cabc 100644 --- a/Doc/Zsh/options.yo +++ b/Doc/Zsh/options.yo @@ -534,6 +534,21 @@ cindex(globbing, of . files) item(tt(GLOB_DOTS) (tt(-4)))( Do not require a leading `tt(.)' in a filename to be matched explicitly. ) +pindex(GLOB_STAR_SHORT) +pindex(NO_GLOB_STAR_SHORT) +pindex(GLOBSTARSHORT) +pindex(NOGLOBSTARSHORT) +cindex(globbing, short forms) +cindex(globbing, ** special) +item(tt(GLOB_STAR_SHORT))( +When this option is set and the default zsh-style globbing is in +effect, the pattern `tt(**/*)' can be abbreviated to `tt(**)' and the +pattern `tt(***/*)' can be abbreviated to tt(***). Hence `tt(**.c)' +finds a file ending in tt(.c) in any subdirectory, and `tt(***.c)' does +the same while also following symbolic links. A tt(/) immediately +after the `tt(**)' or `tt(***)' forces the pattern to be treated as the +unabbreviated form. +) pindex(GLOB_SUBST) pindex(NO_GLOB_SUBST) pindex(GLOBSUBST) diff --git a/Src/glob.c b/Src/glob.c index 24e60d0c5..51ffeb5d5 100644 --- a/Src/glob.c +++ b/Src/glob.c @@ -682,25 +682,32 @@ parsecomplist(char *instr) char *str; int compflags = gf_noglobdots ? (PAT_FILE|PAT_NOGLD) : PAT_FILE; - if (instr[0] == Star && instr[1] == Star && - (instr[2] == '/' || (instr[2] == Star && instr[3] == '/'))) { - /* Match any number of directories. */ - int follow; - - /* with three stars, follow symbolic links */ - follow = (instr[2] == Star); - instr += (3 + follow); - - /* Now get the next path component if there is one. */ - l1 = (Complist) zhalloc(sizeof *l1); - if ((l1->next = parsecomplist(instr)) == NULL) { - errflag |= ERRFLAG_ERROR; - return NULL; + if (instr[0] == Star && instr[1] == Star) { + int shortglob = 0; + if (instr[2] == '/' || (instr[2] == Star && instr[3] == '/') + || (shortglob = isset(GLOBSTARSHORT))) { + /* Match any number of directories. */ + int follow; + + /* with three stars, follow symbolic links */ + follow = (instr[2] == Star); + /* + * With GLOBSTARSHORT, leave a star in place for the + * pattern inside the directory. + */ + instr += ((shortglob ? 1 : 3) + follow); + + /* Now get the next path component if there is one. */ + l1 = (Complist) zhalloc(sizeof *l1); + if ((l1->next = parsecomplist(instr)) == NULL) { + errflag |= ERRFLAG_ERROR; + return NULL; + } + l1->pat = patcompile(NULL, compflags | PAT_ANY, NULL); + l1->closure = 1; /* ...zero or more times. */ + l1->follow = follow; + return l1; } - l1->pat = patcompile(NULL, compflags | PAT_ANY, NULL); - l1->closure = 1; /* ...zero or more times. */ - l1->follow = follow; - return l1; } /* Parse repeated directories such as (dir/)# and (dir/)## */ diff --git a/Src/options.c b/Src/options.c index 1fb102f1d..3bf9f39a4 100644 --- a/Src/options.c +++ b/Src/options.c @@ -140,6 +140,7 @@ static struct optname optns[] = { {{NULL, "globassign", OPT_EMULATE|OPT_CSH}, GLOBASSIGN}, {{NULL, "globcomplete", 0}, GLOBCOMPLETE}, {{NULL, "globdots", OPT_EMULATE}, GLOBDOTS}, +{{NULL, "globstarshort", OPT_EMULATE}, GLOBSTARSHORT}, {{NULL, "globsubst", OPT_EMULATE|OPT_NONZSH}, GLOBSUBST}, {{NULL, "hashcmds", OPT_ALL}, HASHCMDS}, {{NULL, "hashdirs", OPT_ALL}, HASHDIRS}, diff --git a/Src/zsh.h b/Src/zsh.h index d03d171e4..a6f039741 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -2215,6 +2215,7 @@ enum { GLOBASSIGN, GLOBCOMPLETE, GLOBDOTS, + GLOBSTARSHORT, GLOBSUBST, HASHCMDS, HASHDIRS, -- cgit v1.2.3 From 830d54e629e8e12eb5a219a65a013876662e7b3e Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Wed, 11 Nov 2015 18:04:20 +0000 Subject: 37092: make nested ${(P)name} properly refer to parameter on return --- ChangeLog | 5 +++ Doc/Zsh/expn.yo | 11 ++++- Src/Zle/compctl.c | 4 +- Src/Zle/zle_tricky.c | 2 +- Src/cond.c | 2 +- Src/exec.c | 16 ++++---- Src/glob.c | 2 +- Src/subst.c | 114 +++++++++++++++++++++++++++++++-------------------- Src/zsh.h | 51 +++++++++++++++++------ 9 files changed, 138 insertions(+), 69 deletions(-) (limited to 'Src/glob.c') diff --git a/ChangeLog b/ChangeLog index 5f6c59c0a..e9bef7d68 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2015-11-11 Peter Stephenson + * 37092: Doc/Zsh/expn.yo, Src/Zle/compctl.c, + Src/Zle/zle_tricky.c, Src/cond.c, Src/exec.c, Src/glob.c, + Src/subst.c, Src/zsh.h: make a ${(P)name} subexpression properly + refer to a parameter name. + * 37091: Src/Zle/zle_utils.c: clear lastline and lastlinesz when freeing. diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo index 20e0c8d35..4c373d1f2 100644 --- a/Doc/Zsh/expn.yo +++ b/Doc/Zsh/expn.yo @@ -1033,7 +1033,16 @@ var(name) used in this fashion. If used with a nested parameter or command substitution, the result of that will be taken as a parameter name in the same way. For example, if you have `tt(foo=bar)' and `tt(bar=baz)', the strings tt(${(P)foo}), -tt(${(P)${foo}}), and tt(${(P)$(echo bar)}) will be expanded to `tt(baz)'. +tt(${(P)${foo}}), and tt(${(P)$(echo bar)}) will be expanded to +`tt(baz)'. + +Likewise, if the reference is itself nested, the expression with the +flag is treated as if it were directly replaced by the parameter name. +It is an error if this nested substitution produces an array with more +than one word. For example, if `tt(name=assoc)' where the parameter +tt(assoc) is an associative array, then +`tt(${${(P)name}[elt]})' refers to the element of the associative +subscripted `tt(elt)'. ) item(tt(q))( Quote characters that are special to the shell in the resulting words with diff --git a/Src/Zle/compctl.c b/Src/Zle/compctl.c index bac533e7e..8381867d0 100644 --- a/Src/Zle/compctl.c +++ b/Src/Zle/compctl.c @@ -2116,7 +2116,7 @@ getreal(char *str) noerrs = 1; addlinknode(l, dupstring(str)); - prefork(l, 0); + prefork(l, 0, NULL); noerrs = ne; if (!errflag && nonempty(l) && ((char *) peekfirst(l)) && ((char *) peekfirst(l))[0]) @@ -3728,7 +3728,7 @@ makecomplistflags(Compctl cc, char *s, int incmd, int compadd) errflag &= ~ERRFLAG_ERROR; zcontext_restore(); /* Fine, now do full expansion. */ - prefork(foo, 0); + prefork(foo, 0, NULL); if (!errflag) { globlist(foo, 0); if (!errflag) diff --git a/Src/Zle/zle_tricky.c b/Src/Zle/zle_tricky.c index e26f66379..4e6854928 100644 --- a/Src/Zle/zle_tricky.c +++ b/Src/Zle/zle_tricky.c @@ -2223,7 +2223,7 @@ doexpansion(char *s, int lst, int olst, int explincmd) else if (*ts == '\'') *ts = Snull; addlinknode(vl, ss); - prefork(vl, 0); + prefork(vl, 0, NULL); if (errflag) goto end; if (lst == COMP_LIST_EXPAND || lst == COMP_EXPAND) { diff --git a/Src/cond.c b/Src/cond.c index df9065660..c5ab65eea 100644 --- a/Src/cond.c +++ b/Src/cond.c @@ -43,7 +43,7 @@ static void cond_subst(char **strp, int glob_ok) checkglobqual(*strp, strlen(*strp), 1, NULL)) { LinkList args = newlinklist(); addlinknode(args, *strp); - prefork(args, 0); + prefork(args, 0, NULL); while (!errflag && args && nonempty(args) && has_token((char *)peekfirst(args))) zglob(args, firstnode(args), 0); diff --git a/Src/exec.c b/Src/exec.c index f0d1d2f70..c0ee527b7 100644 --- a/Src/exec.c +++ b/Src/exec.c @@ -2290,7 +2290,7 @@ addvars(Estate state, Wordcode pc, int addflags) if (vl && htok) { prefork(vl, (isstr ? (PREFORK_SINGLE|PREFORK_ASSIGN) : - PREFORK_ASSIGN)); + PREFORK_ASSIGN), NULL); if (errflag) { state->pc = opc; return; @@ -2416,7 +2416,7 @@ void execsubst(LinkList strs) { if (strs) { - prefork(strs, esprefork); + prefork(strs, esprefork, NULL); if (esglob && !errflag) { LinkList ostrs = strs; globlist(strs, 0); @@ -2721,7 +2721,7 @@ execcmd(Estate state, int input, int output, int how, int last1) /* Do prefork substitutions */ esprefork = (assign || isset(MAGICEQUALSUBST)) ? PREFORK_TYPESET : 0; if (args && htok) - prefork(args, esprefork); + prefork(args, esprefork, NULL); if (type == WC_SIMPLE || type == WC_TYPESET) { int unglobbed = 0; @@ -3558,7 +3558,7 @@ execcmd(Estate state, int input, int output, int how, int last1) */ /* Unused dummy value for name */ (void)ecgetstr(state, EC_DUPTOK, &htok); - prefork(&svl, PREFORK_TYPESET); + prefork(&svl, PREFORK_TYPESET, NULL); if (errflag) { state->pc = opc; break; @@ -3584,7 +3584,7 @@ execcmd(Estate state, int input, int output, int how, int last1) } continue; } - prefork(&svl, PREFORK_SINGLE); + prefork(&svl, PREFORK_SINGLE, NULL); name = empty(&svl) ? "" : (char *)getdata(firstnode(&svl)); } @@ -3600,7 +3600,9 @@ execcmd(Estate state, int input, int output, int how, int last1) } else { if (htok) { init_list1(svl, val); - prefork(&svl, PREFORK_SINGLE|PREFORK_ASSIGN); + prefork(&svl, + PREFORK_SINGLE|PREFORK_ASSIGN, + NULL); if (errflag) { state->pc = opc; break; @@ -3622,7 +3624,7 @@ execcmd(Estate state, int input, int output, int how, int last1) EC_DUPTOK, &htok); if (asg->value.array) { - prefork(asg->value.array, PREFORK_ASSIGN); + prefork(asg->value.array, PREFORK_ASSIGN, NULL); if (errflag) { state->pc = opc; break; diff --git a/Src/glob.c b/Src/glob.c index 51ffeb5d5..94b3f620d 100644 --- a/Src/glob.c +++ b/Src/glob.c @@ -2093,7 +2093,7 @@ xpandredir(struct redir *fn, LinkList redirtab) /* Stick the name in a list... */ init_list1(fake, fn->name); /* ...which undergoes all the usual shell expansions */ - prefork(&fake, isset(MULTIOS) ? 0 : PREFORK_SINGLE); + prefork(&fake, isset(MULTIOS) ? 0 : PREFORK_SINGLE, NULL); /* Globbing is only done for multios. */ if (!errflag && isset(MULTIOS)) globlist(&fake, 0); diff --git a/Src/subst.c b/Src/subst.c index febdc9bea..f3a4ad44d 100644 --- a/Src/subst.c +++ b/Src/subst.c @@ -44,15 +44,23 @@ char nulstring[] = {Nularg, '\0'}; * - Brace expansion * - Tilde and equals substitution * - * PREFORK_* flags are defined in zsh.h + * "flag"s contains PREFORK_* flags, defined in zsh.h. + * + * "ret_flags" is used to return values from nested parameter + * substitions. It may be NULL in which case PREFORK_SUBEXP + * must not appear in flags; any return value from below + * will be discarded. */ /**/ mod_export void -prefork(LinkList list, int flags) +prefork(LinkList list, int flags, int *ret_flags) { LinkNode node, stop = 0; int keep = 0, asssub = (flags & PREFORK_TYPESET) && isset(KSHTYPESET); + int ret_flags_local = 0; + if (!ret_flags) + ret_flags = &ret_flags_local; /* will be discarded */ queue_signals(); for (node = firstnode(list); node; incnode(node)) { @@ -75,10 +83,8 @@ prefork(LinkList list, int flags) setdata(node, cptr); } if (!(node = stringsubst(list, node, - flags & (PREFORK_SINGLE|PREFORK_SPLIT| - PREFORK_SHWORDSPLIT| - PREFORK_NOSHWORDSPLIT), - asssub))) { + flags & ~(PREFORK_TYPESET|PREFORK_ASSIGN), + ret_flags, asssub))) { unqueue_signals(); return; } @@ -149,7 +155,8 @@ stringsubstquote(char *strstart, char **pstrdpos) /**/ static LinkNode -stringsubst(LinkList list, LinkNode node, int pf_flags, int asssub) +stringsubst(LinkList list, LinkNode node, int pf_flags, int *ret_flags, + int asssub) { int qt; char *str3 = (char *)getdata(node); @@ -235,7 +242,8 @@ stringsubst(LinkList list, LinkNode node, int pf_flags, int asssub) pf_flags |= PREFORK_SHWORDSPLIT; node = paramsubst( list, node, &str, qt, - pf_flags & (PREFORK_SINGLE|PREFORK_SHWORDSPLIT)); + pf_flags & (PREFORK_SINGLE|PREFORK_SHWORDSPLIT| + PREFORK_SUBEXP), ret_flags); if (errflag || !node) return NULL; str3 = (char *)getdata(node); @@ -413,29 +421,13 @@ singsub(char **s) init_list1(foo, *s); - prefork(&foo, PREFORK_SINGLE); + prefork(&foo, PREFORK_SINGLE, NULL); if (errflag) return; *s = (char *) ugetnode(&foo); DPUTS(nonempty(&foo), "BUG: singsub() produced more than one word!"); } -/* - * Bit flags passed back from multsub() to paramsubst(). - */ -enum { - /* - * Set if the string had whitespace at the start - * that should cause word splitting against any preceeding string. - */ - WS_AT_START = 1, - /* - * Set if the string had whitespace at the end - * that should cause word splitting against any following string. - */ - WS_AT_END = 2 -}; - /* Perform substitution on a single word, *s. Unlike with singsub(), the * result can be more than one word. If split is non-zero, the string is * first word-split using IFS, but only for non-quoted "whitespace" (as @@ -448,13 +440,13 @@ enum { * NULL to use IFS). The return value is true iff the expansion resulted * in an empty list. * - * *ws_at_start is set to bits in the enum above as neeed. + * *ms_flags is set to bits in the enum above as neeed. */ /**/ static int multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep, - int *ws_sub) + int *ms_flags) { int l; char **r, **p, *x = *s; @@ -470,7 +462,7 @@ multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep, l++; if (!iwsep(STOUC(c))) break; - *ws_sub |= WS_AT_START; + *ms_flags |= MULTSUB_WS_AT_START; } } @@ -503,7 +495,7 @@ multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep, break; } if (!*x) { - *ws_sub |= WS_AT_END; + *ms_flags |= MULTSUB_WS_AT_END; break; } insertlinknode(&foo, n, (void *)x), incnode(n); @@ -532,7 +524,7 @@ multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep, } } - prefork(&foo, pf_flags); + prefork(&foo, pf_flags, ms_flags); if (errflag) { if (isarr) *isarr = 0; @@ -1517,7 +1509,8 @@ check_colon_subscript(char *str, char **endp) /**/ static LinkNode -paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags) +paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, + int *ret_flags) { char *aptr = *str, c, cc; char *s = aptr, *fstr, *idbeg, *idend, *ostr = (char *) getdata(n); @@ -1747,7 +1740,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags) * whitespace. However, if there's no "x" the whitespace is * simply removed. */ - int ws_sub = 0; + int ms_flags = 0; *s++ = '\0'; /* @@ -2296,8 +2289,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags) * remove the aspar test and extract a value from an array, if * necessary, when we handle (P) lower down. */ - if (multsub(&val, 0, (aspar ? NULL : &aval), &isarr, NULL, - &ws_sub) && quoted) { + if (multsub(&val, PREFORK_SUBEXP, (aspar ? NULL : &aval), &isarr, NULL, + &ms_flags) && quoted) { /* Empty quoted string --- treat as null string, not elided */ isarr = -1; aval = (char **) hcalloc(sizeof(char *)); @@ -2311,6 +2304,28 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags) */ while (inull(*s)) s++; + if (ms_flags & MULTSUB_PARAM_NAME) { + /* + * Downbelow has told us this is a parameter name, e.g. + * ${${(P)name}...}. We're going to behave as if + * we have exactly that name followed by the rest of + * the parameter for subscripting etc. + * + * See below for where we set the flag in the nested + * substitution. + */ + if (isarr) { + if (aval[1]) { + zerr("parameter name reference used with array"); + return NULL; + } + val = aval[0]; + isarr = 0; + } + s = dyncat(val, s); + /* Now behave po-faced as if it was always like that... */ + subexp = aspar = 0; + } v = (Value) NULL; } else if (aspar) { /* @@ -2328,13 +2343,24 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags) } else vunset = 1; } + if (aspar && (pf_flags & PREFORK_SUBEXP)) { + /* + * This is the inner handling for the case referred to above + * where we have something like ${${(P)name}...}. + * + * Treat this as as a normal value here; all transformations on + * result are in outer instance. + */ + aspar = 0; + *ret_flags |= MULTSUB_PARAM_NAME; + } /* * We need to retrieve a value either if we haven't already * got it from a subexpression, or if the processing so * far has just yielded us a parameter name to be processed * with (P). */ - if (!subexp || aspar) { + else if (!subexp || aspar) { char *ov = val; /* @@ -2768,7 +2794,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags) split_flags = PREFORK_NOSHWORDSPLIT; } multsub(&val, split_flags, (aspar ? NULL : &aval), - &isarr, NULL, &ws_sub); + &isarr, NULL, &ms_flags); copied = 1; spbreak = 0; /* Leave globsubst on if forced */ @@ -2797,14 +2823,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags) * behavior on caller choice of PREFORK_SHWORDSPLIT. */ multsub(&val, spbreak ? PREFORK_SINGLE : PREFORK_NOSHWORDSPLIT, - NULL, &isarr, NULL, &ws_sub); + NULL, &isarr, NULL, &ms_flags); } else { if (spbreak) split_flags = PREFORK_SPLIT|PREFORK_SHWORDSPLIT; else split_flags = PREFORK_NOSHWORDSPLIT; multsub(&val, split_flags, &aval, &isarr, NULL, - &ws_sub); + &ms_flags); spbreak = 0; } if (arrasg) { @@ -3336,7 +3362,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags) } if (haserr || errflag) return NULL; - ws_sub = 0; + ms_flags = 0; } /* * This handles taking a length with ${#foo} and variations. @@ -3375,7 +3401,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags) sprintf(buf, "%ld", len); val = dupstring(buf); isarr = 0; - ws_sub = 0; + ms_flags = 0; } /* At this point we make sure that our arrayness has affected the * arrayness of the linked list. Then, we can turn our value into @@ -3405,7 +3431,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags) if (isarr) { val = sepjoin(aval, sep, 1); isarr = 0; - ws_sub = 0; + ms_flags = 0; } if (!ssub && (spbreak || spsep)) { aval = sepsplit(val, spsep, 0, 1); @@ -3690,12 +3716,12 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags) * If a multsub result had whitespace at the start and we're * splitting and there's a previous string, now's the time to do so. */ - if ((ws_sub & WS_AT_START) && aptr > ostr) { + if ((ms_flags & MULTSUB_WS_AT_START) && aptr > ostr) { insertlinknode(l, n, dupstrpfx(ostr, aptr - ostr)), incnode(n); ostr = aptr; } /* Likewise at the end */ - if ((ws_sub & WS_AT_END) && *fstr) { + if ((ms_flags & MULTSUB_WS_AT_END) && *fstr) { insertlinknode(l, n, dupstring(fstr)); /* appended, no incnode */ *fstr = '\0'; } @@ -3777,7 +3803,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags) *--fstr = Marker; init_list1(tl, fstr); - if (!eval && !stringsubst(&tl, firstnode(&tl), ssub, 0)) + if (!eval && !stringsubst(&tl, firstnode(&tl), ssub, ret_flags, 0)) return NULL; *str = aptr; tn = firstnode(&tl); diff --git a/Src/zsh.h b/Src/zsh.h index a6f039741..d3bfcefcc 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -1866,18 +1866,45 @@ enum { }; /* Flags as the second argument to prefork */ -/* argument handled like typeset foo=bar */ -#define PREFORK_TYPESET 0x01 -/* argument handled like the RHS of foo=bar */ -#define PREFORK_ASSIGN 0x02 -/* single word substitution */ -#define PREFORK_SINGLE 0x04 -/* explicitly split nested substitution */ -#define PREFORK_SPLIT 0x08 -/* SHWORDSPLIT in parameter expn */ -#define PREFORK_SHWORDSPLIT 0x10 -/* SHWORDSPLIT forced off in nested subst */ -#define PREFORK_NOSHWORDSPLIT 0x20 +enum { + /* argument handled like typeset foo=bar */ + PREFORK_TYPESET = 0x01, + /* argument handled like the RHS of foo=bar */ + PREFORK_ASSIGN = 0x02, + /* single word substitution */ + PREFORK_SINGLE = 0x04, + /* explicitly split nested substitution */ + PREFORK_SPLIT = 0x08, + /* SHWORDSPLIT in parameter expn */ + PREFORK_SHWORDSPLIT = 0x10, + /* SHWORDSPLIT forced off in nested subst */ + PREFORK_NOSHWORDSPLIT = 0x20, + /* Prefork is part of a parameter subexpression */ + PREFORK_SUBEXP = 0x40 +}; + +/* + * Bit flags passed back from multsub() to paramsubst(). + * Some flags go from a nested parmsubst() through the enclosing + * stringsubst() and prefork(). + */ +enum { + /* + * Set if the string had whitespace at the start + * that should cause word splitting against any preceeding string. + */ + MULTSUB_WS_AT_START = 1, + /* + * Set if the string had whitespace at the end + * that should cause word splitting against any following string. + */ + MULTSUB_WS_AT_END = 2, + /* + * Set by nested paramsubst() to indicate the return + * value is a parameter name, rather than a value. + */ + MULTSUB_PARAM_NAME = 4 +}; /* * Structure for adding parameters in a module. -- cgit v1.2.3