diff options
author | Axel Beckert <abe@deuxchevaux.org> | 2015-11-25 18:51:00 +0100 |
---|---|---|
committer | Axel Beckert <abe@deuxchevaux.org> | 2015-11-25 18:51:00 +0100 |
commit | 317ec32cb1cbd15b31e17bcb07f09c52cd37c44a (patch) | |
tree | 88a02c853dfafd82a2d551d862d8dfb056b1bee6 /Src/pattern.c | |
parent | 1637291aaea12ddcfd549d50d49c480185995c1a (diff) | |
parent | cce4261a3c6f4bf78b483db61623c80e3c98d10b (diff) | |
download | zsh-317ec32cb1cbd15b31e17bcb07f09c52cd37c44a.tar.gz zsh-317ec32cb1cbd15b31e17bcb07f09c52cd37c44a.zip |
Merge tag 'zsh-5.1.1-test-1' into debian
Diffstat (limited to 'Src/pattern.c')
-rw-r--r-- | Src/pattern.c | 388 |
1 files changed, 276 insertions, 112 deletions
diff --git a/Src/pattern.c b/Src/pattern.c index 3b55ccf1c..9e8a80ae1 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -220,8 +220,10 @@ typedef union upat *Upat; #if defined(ZSH_64_BIT_TYPE) || defined(LONG_IS_64_BIT) typedef zlong zrange_t; #define ZRANGE_T_IS_SIGNED (1) +#define ZRANGE_MAX ZLONG_MAX #else typedef unsigned long zrange_t; +#define ZRANGE_MAX ULONG_MAX #endif #ifdef MULTIBYTE_SUPPORT @@ -2021,124 +2023,127 @@ pattrystart(void) } /* - * Test prog against null-terminated, metafied string. + * Fix up string length stuff. + * + * If we call patallocstr() with "force" to set things up early, it's + * done there, else it's done in pattryrefs(). The reason for the + * difference is in the latter case we may not be relying on + * patallocstr() having an effect. */ /**/ -mod_export int -pattry(Patprog prog, char *string) +static void +patmungestring(char **string, int *stringlen, int *unmetalenin) { - return pattryrefs(prog, string, -1, -1, 0, NULL, NULL, NULL); -} - -/* - * Test prog against string of given length, no null termination - * but still metafied at this point. offset gives an offset - * to include in reported match indices - */ + /* + * Special signalling of empty tokenised string. + */ + if (*stringlen > 0 && **string == Nularg) { + (*string)++; + /* + * If we don't have an unmetafied length + * and need it (we may not) we'll get it later. + */ + if (*unmetalenin > 0) + (*unmetalenin)--; + if (*stringlen > 0) + (*stringlen)--; + } -/**/ -mod_export int -pattrylen(Patprog prog, char *string, int len, int unmetalen, int offset) -{ - return pattryrefs(prog, string, len, unmetalen, offset, NULL, NULL, NULL); + /* Ensure we have a metafied length */ + if (*stringlen < 0) + *stringlen = strlen(*string); } /* - * Test prog against string with given lengths. The input - * string is metafied; stringlen is the raw string length, and - * unmetalen the number of characters in the original string (some - * of which may now be metafied). Either value may be -1 - * to indicate a null-terminated string which will be counted. Note - * there may be a severe penalty for this if a lot of matching is done - * on one string. + * Allocate memeory for pattern match. Note this is specific to use + * of pattern *and* trial string. * - * offset is the position in the original string (not seen by - * the pattern module) at which we are trying to match. - * This is added in to the positions recorded in patbeginp and patendp - * when we are looking for substrings. Currently this only happens - * in the parameter substitution code. + * Unmetafy a trial string for use in pattern matching, if needed. * - * Note this is a character offset, i.e. a metafied character - * counts as 1. + * If it is needed, returns a heap allocated string; if not needed, + * returns NULL. * - * The last three arguments are used to report the positions for the - * backreferences. On entry, *nump should contain the maximum number - * of positions to report. In this case the match, mbegin, mend - * arrays are not altered. - * - * If nump is NULL but endp is not NULL, then *endp is set to the - * end position of the match, taking into account patinstart. + * prog is the pattern to be executed. + * string is the metafied trial string. + * stringlen is it's length; it will be calculated if it's negative + * (this is a simple strlen()). + * unmetalen is the unmetafied length of the string, may be -1. + * force is 1 if we always unmetafy: this is useful if we are going + * to try again with different versions of the string. If this is + * called from pattryrefs() we don't force unmetafication as it won't + * be optimal. This option should be used if the resulting + * patstralloc is going to be passed to pattrylen() / pattryrefs(). + * In patstralloc (supplied by caller, must last until last pattry is done) + * unmetalen is the unmetafied length of the string; it will be + * calculated if the input value is negative. + * unmetalenp is the umetafied length of a path segment preceeding + * the trial string needed for file mananagement; it is calculated as + * needed so does not need to be initialised. + * alloced is the memory allocated on the heap --- same as return value from + * function. */ - /**/ -mod_export int -pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, - int patoffset, - int *nump, int *begp, int *endp) +mod_export +char *patallocstr(Patprog prog, char *string, int stringlen, int unmetalen, + int force, Patstralloc patstralloc) { - int i, maxnpos = 0, ret, needfullpath, unmetalenp; - int origlen; - char **sp, **ep, *tryalloced, *ptr; - char *progstr = (char *)prog + prog->startoff; + int needfullpath; - if (nump) { - maxnpos = *nump; - *nump = 0; - } - /* inherited from domatch, but why, exactly? */ - if (*string == Nularg) { - string++; - unmetalen--; - } + if (force) + patmungestring(&string, &stringlen, &unmetalen); - if (stringlen < 0) - stringlen = strlen(string); - origlen = stringlen; - - patflags = prog->flags; /* * For a top-level ~-exclusion, we will need the full * path to exclude, so copy the path so far and append the * current test string. */ - needfullpath = (patflags & PAT_HAS_EXCLUDP) && pathpos; + needfullpath = (prog->flags & PAT_HAS_EXCLUDP) && pathpos; /* Get the length of the full string when unmetafied. */ if (unmetalen < 0) - unmetalen = ztrsub(string + stringlen, string); - if (needfullpath) - unmetalenp = ztrsub(pathbuf + pathpos, pathbuf); + patstralloc->unmetalen = ztrsub(string + stringlen, string); else - unmetalenp = 0; + patstralloc->unmetalen = unmetalen; + if (needfullpath) { + patstralloc->unmetalenp = ztrsub(pathbuf + pathpos, pathbuf); + if (!patstralloc->unmetalenp) + needfullpath = 0; + } else + patstralloc->unmetalenp = 0; + /* Initialise cache area */ + patstralloc->progstrunmeta = NULL; + patstralloc->progstrunmetalen = 0; - DPUTS(needfullpath && (patflags & (PAT_PURES|PAT_ANY)), + DPUTS(needfullpath && (prog->flags & (PAT_PURES|PAT_ANY)), "rum sort of file exclusion"); /* * Partly for efficiency, and partly for the convenience of * globbing, we don't unmetafy pure string patterns, and * there's no reason to if the pattern is just a *. */ - if (!(patflags & (PAT_PURES|PAT_ANY)) - && (needfullpath || unmetalen != stringlen)) { + if (force || + (!(prog->flags & (PAT_PURES|PAT_ANY)) + && (needfullpath || patstralloc->unmetalen != stringlen))) { /* * We need to copy if we need to prepend the path so far * (in which case we copy both chunks), or if we have * Meta characters. */ - char *dst; - int icopy, ncopy; + char *dst, *ptr; + int i, icopy, ncopy; - dst = tryalloced = zalloc(unmetalen + unmetalenp); + dst = patstralloc->alloced = + zhalloc(patstralloc->unmetalen + patstralloc->unmetalenp); if (needfullpath) { /* loop twice, copy path buffer first time */ ptr = pathbuf; - ncopy = unmetalenp; + ncopy = patstralloc->unmetalenp; } else { /* just loop once, copy string with unmetafication */ ptr = string; - ncopy = unmetalen; + ncopy = patstralloc->unmetalen; } for (icopy = 0; icopy < 2; icopy++) { for (i = 0; i < ncopy; i++) { @@ -2153,22 +2158,136 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, break; /* next time append test string to path so far */ ptr = string; - ncopy = unmetalen; + ncopy = patstralloc->unmetalen; } + } + else + { + patstralloc->alloced = NULL; + } - if (needfullpath) { - patinstart = tryalloced + unmetalenp; - patinpath = tryalloced; - } else { - patinstart = tryalloced; - patinpath = NULL; - } - stringlen = unmetalen; - } else { + return patstralloc->alloced; +} + + +/* + * Test prog against null-terminated, metafied string. + */ + +/**/ +mod_export int +pattry(Patprog prog, char *string) +{ + return pattryrefs(prog, string, -1, -1, NULL, 0, NULL, NULL, NULL); +} + +/* + * Test prog against string of given length, no null termination + * but still metafied at this point. offset gives an offset + * to include in reported match indices + */ + +/**/ +mod_export int +pattrylen(Patprog prog, char *string, int len, int unmetalen, + Patstralloc patstralloc, int offset) +{ + return pattryrefs(prog, string, len, unmetalen, patstralloc, offset, + NULL, NULL, NULL); +} + +/* + * Test prog against string with given lengths. The input + * string is metafied; stringlen is the raw string length, and + * unmetalen the number of characters in the original string (some + * of which may now be metafied). Either value may be -1 + * to indicate a null-terminated string which will be counted. Note + * there may be a severe penalty for this if a lot of matching is done + * on one string. + * + * If patstralloc is not NULL it is used to optimise unmetafication + * of a trial string that may be passed (or any substring may be passed) to + * pattryrefs multiple times or the same pattern (N.B. so patstralloc + * depends on both prog *and* the trial string). This should only be + * done if there is no path prefix (pathpos == 0) as otherwise the path + * buffer and unmetafied string may not match. To do this, + * patallocstr() is callled (use force = 1 to ensure it is alway + * unmetafied); paststralloc points to existing storage. Memory is + * on the heap. + * + * patstralloc->alloced and patstralloc->unmetalen contain the + * unmetafied string and its length. In that case, the rules for the + * earlier arguments change: + * - string is an unmetafied string + * - stringlen is its unmetafied (i.e. actual) length + * - unmetalenin is not used. + * string and stringlen may refer to arbitrary substrings of + * patstralloc->alloced without any internal modification to patstralloc. + * + * patoffset is the position in the original string (not seen by + * the pattern module) at which we are trying to match. + * This is added in to the positions recorded in patbeginp and patendp + * when we are looking for substrings. Currently this only happens + * in the parameter substitution code. It refers to a real character + * offset, i.e. is already in the form ready for presentation to the + * general public --- this is necessary as we don't have the + * information to convert it down here. + * + * Note this is a character offset, i.e. a single possibly metafied and + * possibly multibyte character counts as 1. + * + * The last three arguments are used to report the positions for the + * backreferences. On entry, *nump should contain the maximum number + * of positions to report. In this case the match, mbegin, mend + * arrays are not altered. + * + * If nump is NULL but endp is not NULL, then *endp is set to the + * end position of the match, taking into account patinstart. + */ + +/**/ +mod_export int +pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin, + Patstralloc patstralloc, int patoffset, + int *nump, int *begp, int *endp) +{ + int i, maxnpos = 0, ret; + int origlen; + char **sp, **ep, *ptr; + char *progstr = (char *)prog + prog->startoff; + struct patstralloc patstralloc_struct; + + if (nump) { + maxnpos = *nump; + *nump = 0; + } + + if (!patstralloc) + patmungestring(&string, &stringlen, &unmetalenin); + origlen = stringlen; + + if (patstralloc) { + DPUTS(!patstralloc->alloced, + "External unmetafy didn't actually unmetafy."); + DPUTS(patstralloc->unmetalenp, + "Ooh-err: pathpos with external unmetafy. I have bad vibes."); + patinpath = NULL; patinstart = string; - tryalloced = patinpath = NULL; + /* stringlen is unmetafied length; unmetalenin is ignored */ + } else { + patstralloc = &patstralloc_struct; + if (patallocstr(prog, string, stringlen, unmetalenin, 0, patstralloc)) { + patinstart = patstralloc->alloced + patstralloc->unmetalenp; + stringlen = patstralloc->unmetalen; + } else + patinstart = string; + if (patstralloc->unmetalenp) + patinpath = patstralloc->alloced; + else + patinpath = NULL; } + patflags = prog->flags; patinend = patinstart + stringlen; /* * From now on we do not require NULL termination of @@ -2181,7 +2300,31 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, * Either we are testing against a pure string, * or we can match anything at all. */ - int ret; + int ret, pstrlen; + char *pstr; + if (patstralloc->alloced) + { + /* + * Unmetafied; we need pattern sring that's also unmetafied. + * We'll cache it in the patstralloc structure. + * Note it's on the heap. + */ + if (!patstralloc->progstrunmeta) + { + patstralloc->progstrunmeta = + dupstrpfx(progstr, (int)prog->patmlen); + unmetafy(patstralloc->progstrunmeta, + &patstralloc->progstrunmetalen); + } + pstr = patstralloc->progstrunmeta; + pstrlen = patstralloc->progstrunmetalen; + } + else + { + /* Metafied. */ + pstr = progstr; + pstrlen = (int)prog->patmlen; + } if (prog->flags & PAT_ANY) { /* * Optimisation for a single "*": always matches @@ -2193,11 +2336,11 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, * Testing a pure string. See if initial * components match. */ - int lendiff = stringlen - prog->patmlen; + int lendiff = stringlen - pstrlen; if (lendiff < 0) { /* No, the pattern string is too long. */ ret = 0; - } else if (!memcmp(progstr, patinstart, prog->patmlen)) { + } else if (!memcmp(pstr, patinstart, pstrlen)) { /* * Initial component matches. Matches either * if lengths are the same or we are not anchored @@ -2219,24 +2362,36 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, } else { /* * Remember the length in case used for ${..#..} etc. - * In this case, we didn't unmetafy the string. + * In this case, we didn't unmetafy the pattern string + * In the orignal structure, but it might be unmetafied + * for use with an unmetafied test string. */ - patinlen = (int)prog->patmlen; + patinlen = pstrlen; /* if matching files, must update globbing flags */ patglobflags = prog->globend; if ((patglobflags & GF_MATCHREF) && !(patflags & PAT_FILE)) { - char *str = ztrduppfx(patinstart, patinlen); + char *str; int mlen; - /* - * Count the characters. We're not using CHARSUB() - * because the string is still metafied. - */ - MB_METACHARINIT(); - mlen = MB_METASTRLEN2END(patinstart, 0, - patinstart + patinlen); + if (patstralloc->alloced) { + /* + * Unmetafied: pstrlen contains unmetafied + * length in bytes. + */ + str = metafy(patinstart, pstrlen, META_DUP); + mlen = CHARSUB(patinstart, patinstart + pstrlen); + } else { + str = ztrduppfx(patinstart, patinlen); + /* + * Count the characters. We're not using CHARSUB() + * because the string is still metafied. + */ + MB_METACHARINIT(); + mlen = MB_METASTRLEN2END(patinstart, 0, + patinstart + patinlen); + } setsparam("MATCH", str); setiparam("MBEGIN", @@ -2248,9 +2403,6 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, } } - if (tryalloced) - zfree(tryalloced, unmetalen + unmetalenp); - return ret; } else { int q = queue_signal_level(); @@ -2287,8 +2439,6 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, } } if (!ret) { - if (tryalloced) - zfree(tryalloced, unmetalen + unmetalenp); return 0; } @@ -2320,8 +2470,11 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, /* * Optimization: if we didn't find any Meta characters * to begin with, we don't need to look for them now. + * + * For patstralloc pased in, we want the unmetafied length. */ - if (unmetalen != origlen) { + if (patstralloc == &patstralloc_struct && + patstralloc->unmetalen != origlen) { for (ptr = patinstart; ptr < patinput; ptr++) if (imeta(*ptr)) patinlen++; @@ -2442,16 +2595,15 @@ pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen, restore_queue_signals(q); - if (tryalloced) - zfree(tryalloced, unmetalen + unmetalenp); - return ret; } } /* * Return length of previous succesful match. This is - * in metafied bytes, i.e. includes a count of Meta characters. + * in metafied bytes, i.e. includes a count of Meta characters, + * unless the match was done on an unmetafied string using + * a patstralloc stuct, in which case it, too is unmetafed. * Unusual and futile attempt at modular encapsulation. */ @@ -2641,19 +2793,30 @@ patmatch(Upat prog) start = compend = patinput; comp = 0; while (patinput < patinend && idigit(*patinput)) { - if (comp) - comp *= 10; - comp += *patinput - '0'; + int out_of_range = 0; + int digit = *patinput - '0'; + if (comp > ZRANGE_MAX / (zlong)10) { + out_of_range = 1; + } else { + zrange_t c10 = comp ? comp * 10 : 0; + if (ZRANGE_MAX - c10 < digit) { + out_of_range = 1; + } else { + comp = c10; + comp += digit; + } + } patinput++; compend++; - if (comp & ((zrange_t)1 << (sizeof(comp)*8 - + if (out_of_range || + (comp & ((zrange_t)1 << (sizeof(comp)*8 - #ifdef ZRANGE_T_IS_SIGNED 2 #else 1 #endif - ))) { + )))) { /* * Out of range (allowing for signedness, which * we need if we are using zlongs). @@ -3213,6 +3376,7 @@ patmatch(Upat prog) scan[P_CT_CURRENT].l = cur + 1; if (patmatch(scan + P_CT_OPERAND)) return 1; + scan[P_CT_CURRENT].l = cur; patinput = patinput_thistime; } if (cur < min) |