From a73c705b0c864a9ce042fca6e72e0c92d4ad8237 Mon Sep 17 00:00:00 2001 From: Oliver Kiddle Date: Fri, 16 Dec 2022 23:22:33 +0100 Subject: 51212: remove STOUC() macro This served as a workaround for ancient compilers where casts to unsigned char were broken. --- Src/pattern.c | 81 ++++++++++++++++++++++++++++++----------------------------- 1 file changed, 41 insertions(+), 40 deletions(-) (limited to 'Src/pattern.c') diff --git a/Src/pattern.c b/Src/pattern.c index e947d1216..3edda1772 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -239,7 +239,7 @@ typedef unsigned long zrange_t; * a bit tricky... */ #define WCHAR_INVALID(ch) \ - ((wchar_t) (0xDC00 + STOUC(ch))) + ((wchar_t) (0xDC00 + (unsigned char) ch)) #endif /* MULTIBYTE_SUPPORT */ /* @@ -346,7 +346,7 @@ metacharinc(char **x) * set doesn't have the property that all bytes with the 8th * bit clear are single characters then we are stuffed. */ - if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*inptr) & 0x80)) + if (!(patglobflags & GF_MULTIBYTE) || !((unsigned char) *inptr & 0x80)) { if (itok(*inptr)) inchar = ztokens[*inptr++ - Pound]; @@ -357,7 +357,7 @@ metacharinc(char **x) inchar = *inptr++; } *x = inptr; - return (wchar_t)STOUC(inchar); + return (wchar_t)(unsigned char) inchar; } while (*inptr) { @@ -1181,8 +1181,8 @@ pattern_range_to_string(char *rangestr, char *outstr) int len = 0; while (*rangestr) { - if (imeta(STOUC(*rangestr))) { - int swtype = STOUC(*rangestr) - STOUC(Meta); + if (imeta((unsigned char) *rangestr)) { + int swtype = (unsigned char) *rangestr - (unsigned char) Meta; if (swtype == 0) { /* Ordindary metafied character */ @@ -1278,17 +1278,17 @@ patcomppiece(int *flagp, int paren) kshchar = '\0'; if (*patparse && patparse[1] == Inpar) { if (*patparse == zpc_special[ZPC_KSH_PLUS]) - kshchar = STOUC('+'); + kshchar = (unsigned char) '+'; else if (*patparse == zpc_special[ZPC_KSH_BANG]) - kshchar = STOUC('!'); + kshchar = (unsigned char) '!'; else if (*patparse == zpc_special[ZPC_KSH_BANG2]) - kshchar = STOUC('!'); + kshchar = (unsigned char) '!'; else if (*patparse == zpc_special[ZPC_KSH_AT]) - kshchar = STOUC('@'); + kshchar = (unsigned char) '@'; else if (*patparse == zpc_special[ZPC_KSH_STAR]) - kshchar = STOUC('*'); + kshchar = (unsigned char) '*'; else if (*patparse == zpc_special[ZPC_KSH_QUEST]) - kshchar = STOUC('?'); + kshchar = (unsigned char) '?'; } /* @@ -1468,7 +1468,8 @@ patcomppiece(int *flagp, int paren) ch = range_type(patparse, len); patparse = nptr + 2; if (ch != PP_UNKWN) - patadd(NULL, STOUC(Meta) + ch, 1, PA_NOALIGN); + patadd(NULL, (unsigned char) Meta + ch, 1, + PA_NOALIGN); continue; } charstart = patparse; @@ -1476,10 +1477,10 @@ patcomppiece(int *flagp, int paren) if (*patparse == Dash && patparse[1] && patparse[1] != Outbrack) { - patadd(NULL, STOUC(Meta)+PP_RANGE, 1, PA_NOALIGN); + patadd(NULL, (unsigned char) Meta+PP_RANGE, 1, PA_NOALIGN); if (itok(*charstart)) { - patadd(0, STOUC(ztokens[*charstart - Pound]), 1, - PA_NOALIGN); + patadd(0, (unsigned char) ztokens[*charstart - Pound], + 1, PA_NOALIGN); } else { patadd(charstart, 0, patparse-charstart, PA_NOALIGN); } @@ -1487,7 +1488,7 @@ patcomppiece(int *flagp, int paren) METACHARINC(patparse); } if (itok(*charstart)) { - patadd(0, STOUC(ztokens[*charstart - Pound]), 1, + patadd(0, (unsigned char) ztokens[*charstart - Pound], 1, PA_NOALIGN); } else { patadd(charstart, 0, patparse-charstart, PA_NOALIGN); @@ -1910,8 +1911,8 @@ charref(char *x, char *y, int *zmb_ind) wchar_t wc; size_t ret; - if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80)) - return (wchar_t) STOUC(*x); + if (!(patglobflags & GF_MULTIBYTE) || !((unsigned char) *x & 0x80)) + return (wchar_t) (unsigned char) *x; ret = mbrtowc(&wc, x, y-x, &shiftstate); @@ -1937,7 +1938,7 @@ charnext(char *x, char *y) wchar_t wc; size_t ret; - if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80)) + if (!(patglobflags & GF_MULTIBYTE) || !((unsigned char) *x & 0x80)) return x + 1; ret = mbrtowc(&wc, x, y-x, &shiftstate); @@ -1965,8 +1966,8 @@ charrefinc(char **x, char *y, int *z) wchar_t wc; size_t ret; - if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(**x) & 0x80)) - return (wchar_t) STOUC(*(*x)++); + if (!(patglobflags & GF_MULTIBYTE) || !((unsigned char) **x & 0x80)) + return (wchar_t) (unsigned char) *(*x)++; ret = mbrtowc(&wc, *x, y-*x, &shiftstate); @@ -2025,13 +2026,13 @@ charsub(char *x, char *y) #else /* no MULTIBYTE_SUPPORT */ /* Get a character from the start point in a string */ -#define CHARREF(x, y) (STOUC(*(x))) +#define CHARREF(x, y) ((unsigned char) (*(x))) /* Get a pointer to the next character */ #define CHARNEXT(x, y) ((x)+1) /* Increment a pointer past the current character. */ #define CHARINC(x, y) ((x)++) /* Get a character and increment */ -#define CHARREFINC(x, y, z) (STOUC(*(x)++)) +#define CHARREFINC(x, y, z) ((unsigned char) (*(x)++)) /* Counter the number of characters between two pointers, smaller first */ #define CHARSUB(x,y) ((y) - (x)) @@ -2890,7 +2891,7 @@ patmatch(Upat prog) } if (!no && P_OP(next) == P_EXACTLY && (!P_LS_LEN(next) || - !idigit(STOUC(*P_LS_STR(next)))) && + !idigit((unsigned char) (*P_LS_STR(next)))) && !(patglobflags & 0xff)) return 0; patinput = --save; @@ -3600,8 +3601,8 @@ mb_patmatchrange(char *range, wchar_t ch, int zmb_ind, wint_t *indptr, int *mtp) * ranges specially. */ while (*range) { - if (imeta(STOUC(*range))) { - int swtype = STOUC(*range++) - STOUC(Meta); + if (imeta((unsigned char) *range)) { + int swtype = (unsigned char) *range++ - (unsigned char) Meta; if (mtp) *mtp = swtype; switch (swtype) { @@ -3753,8 +3754,8 @@ mb_patmatchindex(char *range, wint_t ind, wint_t *chr, int *mtp) *mtp = 0; while (*range) { - if (imeta(STOUC(*range))) { - int swtype = STOUC(*range++) - STOUC(Meta); + if (imeta((unsigned char) *range)) { + int swtype = (unsigned char) *range++ - (unsigned char) Meta; switch (swtype) { case 0: range--; @@ -3845,13 +3846,13 @@ patmatchrange(char *range, int ch, int *indptr, int *mtp) * ranges specially. */ for (; *range; range++) { - if (imeta(STOUC(*range))) { - int swtype = STOUC(*range) - STOUC(Meta); + if (imeta((unsigned char) *range)) { + int swtype = (unsigned char) *range - (unsigned char) Meta; if (mtp) *mtp = swtype; switch (swtype) { case 0: - if (STOUC(*++range ^ 32) == ch) + if ((unsigned char) (*++range ^ 32) == ch) return 1; break; case PP_ALPHA: @@ -3931,9 +3932,9 @@ patmatchrange(char *range, int ch, int *indptr, int *mtp) break; case PP_RANGE: range++; - r1 = STOUC(UNMETA(range)); + r1 = (unsigned char) UNMETA(range); METACHARINC(range); - r2 = STOUC(UNMETA(range)); + r2 = (unsigned char) UNMETA(range); if (*range == Meta) range++; if (r1 <= ch && ch <= r2) { @@ -3955,7 +3956,7 @@ patmatchrange(char *range, int ch, int *indptr, int *mtp) DPUTS(1, "BUG: unknown metacharacter in range."); break; } - } else if (STOUC(*range) == ch) { + } else if ((unsigned char) *range == ch) { if (mtp) *mtp = 0; return 1; @@ -3989,12 +3990,12 @@ patmatchindex(char *range, int ind, int *chr, int *mtp) *mtp = 0; for (; *range; range++) { - if (imeta(STOUC(*range))) { - int swtype = STOUC(*range) - STOUC(Meta); + if (imeta((unsigned char) *range)) { + int swtype = (unsigned char) *range - (unsigned char) Meta; switch (swtype) { case 0: /* ordinary metafied character */ - rchr = STOUC(*++range) ^ 32; + rchr = (unsigned char) *++range ^ 32; if (!ind) { *chr = rchr; return 1; @@ -4028,9 +4029,9 @@ patmatchindex(char *range, int ind, int *chr, int *mtp) case PP_RANGE: range++; - r1 = STOUC(UNMETA(range)); + r1 = (unsigned char) UNMETA(range); METACHARINC(range); - r2 = STOUC(UNMETA(range)); + r2 = (unsigned char) UNMETA(range); if (*range == Meta) range++; rdiff = r2 - r1; @@ -4050,7 +4051,7 @@ patmatchindex(char *range, int ind, int *chr, int *mtp) } } else { if (!ind) { - *chr = STOUC(*range); + *chr = (unsigned char) *range; return 1; } } -- cgit v1.2.3 From aa8e4a02904b3a1c4b3064eb7502d887f7de958b Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Tue, 1 Aug 2023 14:32:55 +0100 Subject: 52008: Pattern bug with branches + exclusion Add tests. --- ChangeLog | 5 +++++ Src/pattern.c | 22 ++++++++++++++++++++-- Test/D02glob.ztst | 26 ++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 2 deletions(-) (limited to 'Src/pattern.c') diff --git a/ChangeLog b/ChangeLog index 372092a32..8e6e3fb18 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2023-08-01 Peter Stephenson + + * 52008: Src/pattern.c, Test/D02glob.ztst: Fix bug with branches + in patterns followed by an exculsion, and add tests. + 2023-07-31 dana * github #100: HexorCatZ: Completion/Unix/Command/_qemu: diff --git a/Src/pattern.c b/Src/pattern.c index 3edda1772..2a1a514fb 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -2987,14 +2987,15 @@ patmatch(Upat prog) case P_EXCSYNC: /* See the P_EXCLUDE code below for where syncptr comes from */ { - unsigned char *syncptr; + unsigned char *syncstart, *syncptr, *ptr; Upat after; after = P_OPERAND(scan); DPUTS(!P_ISEXCLUDE(after), "BUG: EXCSYNC not followed by EXCLUDE."); DPUTS(!P_OPERAND(after)->p, "BUG: EXCSYNC not handled by EXCLUDE"); - syncptr = P_OPERAND(after)->p + (patinput - patinstart); + syncstart = P_OPERAND(after)->p; + syncptr = syncstart + (patinput - patinstart); /* * If we already matched from here, this time we fail. * See WBRANCH code for story about error count. @@ -3009,6 +3010,23 @@ patmatch(Upat prog) * failed anyway. */ *syncptr = errsfound + 1; + /* + * Because of backtracking, any match before this point + * can't apply to the current branch we're on so is now + * a failure --- this can happen if, on a previous + * branch, we initially marked a success before failing + * on a later part of the pattern after marking up the + * P_EXCSYNC (even an end anchor will have this effect). + * To make sure we record the current match point + * correctly, mark those down now. + * + * This might have side effects on the efficiency of + * pathological cases involving nested branches. To + * fix that we'd probably need to record matches on + * different branches separately. + */ + for (ptr = syncstart; ptr < syncptr; ++ptr) + *ptr = 0; } break; case P_EXCEND: diff --git a/Test/D02glob.ztst b/Test/D02glob.ztst index 850a535e5..4d88e5c27 100644 --- a/Test/D02glob.ztst +++ b/Test/D02glob.ztst @@ -817,6 +817,32 @@ *>*/glob.tmp/(flip|flop) *>*/glob.tmp/(flip|flop)/trailing/components +# The following set test an obscure problem with branches followed by +# exclusions that shows up when the exclusion matches against +# something other than the complete test string, hence the complicated +# double negative. + [[ ab = (|a*)~^(*b) ]] +0:Regression test for exclusion after branches: empty first alternative + + [[ ab = (b|a*)~^(*b) ]] +0:Regression test for exclusion after branches: non-empty first alternative + + [[ ab = (b*|a*)~^(*b) ]] +0:Regression test for exclusion after branches: full length first alternative + +# Corresponding tests where the exclusion should succeed, so the +# match fails. It's hard to know how to provoke bugs here... + [[ abc = (|a*)~^(*b) ]] +1:Regression test for exclusion after branches: failure case 1 + + [[ abc = (b|a*)~^(*b) ]] +1:Regression test for exclusion after branches: failure case 2 + + [[ abc = (b*|a*)~^(*b) ]] +1:Regression test for exclusion after branches: failure case 3 + +# Careful: extendedglob off from this point. + unsetopt extendedglob print -r -- ${(*)=${(@s.+.):-A+B}/(#b)(?)/-${(L)match[1]} ${match[1]}} 0:the '*' qualfier enables extended_glob for pattern matching -- cgit v1.2.3 From ce8909b49428e260c15dce22d764f2831295645a Mon Sep 17 00:00:00 2001 From: Bart Schaefer Date: Sat, 3 Feb 2024 19:52:39 -0800 Subject: unposted: Record as comments some notes about namespace usage exceptions. --- ChangeLog | 3 +++ Src/Zle/zle_tricky.c | 5 ++++- Src/parse.c | 2 ++ Src/pattern.c | 1 + 4 files changed, 10 insertions(+), 1 deletion(-) (limited to 'Src/pattern.c') diff --git a/ChangeLog b/ChangeLog index fbfae8589..386ef3ab9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2024-02-03 Bart Schaefer + * unposted: Src/Zle/zle_tricky.c, Src/parse.c, Src/pattern.c: + Record as comments some notes about namespace usage exceptions. + * 52513: Doc/Zsh/mod_private.yo, Src/Modules/param_private.c, Src/params.c, Test/v10private.ztst: nofork substitutions can use private parameters; fix crash bug on {privateFD}>&N; add diff --git a/Src/Zle/zle_tricky.c b/Src/Zle/zle_tricky.c index 6ceb5d87f..ea2a52390 100644 --- a/Src/Zle/zle_tricky.c +++ b/Src/Zle/zle_tricky.c @@ -1499,6 +1499,7 @@ get_comp_string(void) if (varq) tt = clwords[clwpos]; + /* The only place we complete namespaces, see IIDENT below */ s = itype_end(tt, INAMESPC, 0); sav = *s; *s = '\0'; @@ -1570,6 +1571,8 @@ get_comp_string(void) i = 0; MB_METACHARINIT(); + /* All further uses of IIDENT in this file should change to * + * INAMESPACE if this case is changed. Too ugly to risk now. */ if (itype_end(s, IIDENT, 1) == s) nnb = s + MB_METACHARLEN(s); else @@ -1643,7 +1646,7 @@ get_comp_string(void) } else { /* In mathematical expression, we complete parameter names * * (even if they don't have a `$' in front of them). So we * - * have to find that name. */ + * have to find that name. See above regarding INAMESPC */ char *cspos = zlemetaline + zlemetacs, *wptr, *cptr; we = itype_end(cspos, IIDENT, 0) - zlemetaline; diff --git a/Src/parse.c b/Src/parse.c index 859f4d0fc..2b7e003fc 100644 --- a/Src/parse.c +++ b/Src/parse.c @@ -1935,6 +1935,8 @@ par_simple(int *cmplx, int nr) if (*ptr == Outbrace && ptr > tokstr + 1) { + /* Should we allow namespace FDs, {.foo.bar}>&file ? * + * If so, change IIDENT to INAMESPACE here */ if (itype_end(tokstr+1, IIDENT, 0) >= ptr) { char *toksave = tokstr; diff --git a/Src/pattern.c b/Src/pattern.c index 2a1a514fb..1e0ae88d9 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -3691,6 +3691,7 @@ mb_patmatchrange(char *range, wchar_t ch, int zmb_ind, wint_t *indptr, int *mtp) return 1; break; case PP_IDENT: + /* Could use INAMESPC here? */ if (wcsitype(ch, IIDENT)) return 1; break; -- cgit v1.2.3