From 1b421e4978440234fb73117c8505dad1ccc68d46 Mon Sep 17 00:00:00 2001 From: Jun-ichi Takimoto Date: Mon, 26 Sep 2022 10:52:50 +0900 Subject: 50658 + test: Enable to switch between C/UTF-8 locales in PCRE --- Src/Modules/pcre.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'Src/Modules/pcre.c') diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c index 6289e003e..46875a59b 100644 --- a/Src/Modules/pcre.c +++ b/Src/Modules/pcre.c @@ -47,8 +47,6 @@ zpcre_utf8_enabled(void) #if defined(MULTIBYTE_SUPPORT) && defined(HAVE_NL_LANGINFO) && defined(CODESET) static int have_utf8_pcre = -1; - /* value can toggle based on MULTIBYTE, so don't - * be too eager with caching */ if (have_utf8_pcre < -1) return 0; @@ -56,15 +54,11 @@ zpcre_utf8_enabled(void) return 0; if ((have_utf8_pcre == -1) && - (!strcmp(nl_langinfo(CODESET), "UTF-8"))) { - - if (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre)) + (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre))) { have_utf8_pcre = -2; /* erk, failed to ask */ } - if (have_utf8_pcre < 0) - return 0; - return have_utf8_pcre; + return (have_utf8_pcre == 1) && (!strcmp(nl_langinfo(CODESET), "UTF-8")); #else return 0; -- cgit v1.2.3 From b62e911341c8ec7446378b477c47da4256053dc0 Mon Sep 17 00:00:00 2001 From: Oliver Kiddle Date: Sat, 13 May 2023 00:53:32 +0200 Subject: 51723: migrate pcre module to pcre2 --- ChangeLog | 3 + Src/Modules/pcre.c | 223 +++++++++++++++++++++-------------------------------- Test/V07pcre.ztst | 13 +++- configure.ac | 20 +++-- 4 files changed, 110 insertions(+), 149 deletions(-) (limited to 'Src/Modules/pcre.c') diff --git a/ChangeLog b/ChangeLog index f5c77f801..285b73b2c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2023-05-13 Oliver Kiddle + * 51723: Src/Modules/pcre.c, Test/V07pcre.ztst, configure.ac: + migrate pcre module to pcre2 + * Felipe Contreras: 50612: Misc/vcs_info-examples: fix typo * github #98: Vidhan Bhatt: Completion/Darwin/Command/_shortcuts: diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c index 46875a59b..079ecc2c5 100644 --- a/Src/Modules/pcre.c +++ b/Src/Modules/pcre.c @@ -34,11 +34,11 @@ #define CPCRE_PLAIN 0 /**/ -#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC) -#include +#if defined(HAVE_PCRE2_COMPILE_8) && defined(HAVE_PCRE2_H) +#define PCRE2_CODE_UNIT_WIDTH 8 +#include -static pcre *pcre_pattern; -static pcre_extra *pcre_hints; +static pcre2_code *pcre_pattern; /**/ static int @@ -54,8 +54,8 @@ zpcre_utf8_enabled(void) return 0; if ((have_utf8_pcre == -1) && - (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre))) { - have_utf8_pcre = -2; /* erk, failed to ask */ + (pcre2_config(PCRE2_CONFIG_UNICODE, &have_utf8_pcre))) { + have_utf8_pcre = -2; /* erk, failed to ask */ } return (have_utf8_pcre == 1) && (!strcmp(nl_langinfo(CODESET), "UTF-8")); @@ -69,115 +69,87 @@ zpcre_utf8_enabled(void) static int bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func)) { - int pcre_opts = 0, pcre_errptr, target_len; - const char *pcre_error; + uint32_t pcre_opts = 0; + int target_len; + int pcre_error; + PCRE2_SIZE pcre_offset; char *target; - if(OPT_ISSET(ops,'a')) pcre_opts |= PCRE_ANCHORED; - if(OPT_ISSET(ops,'i')) pcre_opts |= PCRE_CASELESS; - if(OPT_ISSET(ops,'m')) pcre_opts |= PCRE_MULTILINE; - if(OPT_ISSET(ops,'x')) pcre_opts |= PCRE_EXTENDED; - if(OPT_ISSET(ops,'s')) pcre_opts |= PCRE_DOTALL; + if (OPT_ISSET(ops, 'a')) pcre_opts |= PCRE2_ANCHORED; + if (OPT_ISSET(ops, 'i')) pcre_opts |= PCRE2_CASELESS; + if (OPT_ISSET(ops, 'm')) pcre_opts |= PCRE2_MULTILINE; + if (OPT_ISSET(ops, 'x')) pcre_opts |= PCRE2_EXTENDED; + if (OPT_ISSET(ops, 's')) pcre_opts |= PCRE2_DOTALL; if (zpcre_utf8_enabled()) - pcre_opts |= PCRE_UTF8; - -#ifdef HAVE_PCRE_STUDY - if (pcre_hints) -#ifdef PCRE_CONFIG_JIT - pcre_free_study(pcre_hints); -#else - pcre_free(pcre_hints); -#endif - pcre_hints = NULL; -#endif + pcre_opts |= PCRE2_UTF; if (pcre_pattern) - pcre_free(pcre_pattern); + pcre2_code_free(pcre_pattern); pcre_pattern = NULL; target = ztrdup(*args); unmetafy(target, &target_len); - if ((int)strlen(target) != target_len) { - zwarnnam(nam, "embedded NULs in PCRE pattern terminate pattern"); - } - - pcre_pattern = pcre_compile(target, pcre_opts, &pcre_error, &pcre_errptr, NULL); + pcre_pattern = pcre2_compile((PCRE2_SPTR) target, (PCRE2_SIZE) target_len, + pcre_opts, &pcre_error, &pcre_offset, NULL); free(target); if (pcre_pattern == NULL) { - zwarnnam(nam, "error in regex: %s", pcre_error); + PCRE2_UCHAR buffer[256]; + pcre2_get_error_message(pcre_error, buffer, sizeof(buffer)); + zwarnnam(nam, "error in regex: %s", buffer); return 1; } return 0; } -/**/ -#ifdef HAVE_PCRE_STUDY - /**/ static int bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int func)) { - const char *pcre_error; - if (pcre_pattern == NULL) { zwarnnam(nam, "no pattern has been compiled for study"); return 1; } - - if (pcre_hints) -#ifdef PCRE_CONFIG_JIT - pcre_free_study(pcre_hints); -#else - pcre_free(pcre_hints); -#endif - pcre_hints = NULL; - pcre_hints = pcre_study(pcre_pattern, 0, &pcre_error); - if (pcre_error != NULL) - { - zwarnnam(nam, "error while studying regex: %s", pcre_error); - return 1; + int jit = 0; + if (!pcre2_config(PCRE2_CONFIG_JIT, &jit) && jit) { + if (pcre2_jit_compile(pcre_pattern, PCRE2_JIT_COMPLETE) < 0) { + zwarnnam(nam, "error while studying regex"); + return 1; + } } return 0; } -/**/ -#else /* !HAVE_PCRE_STUDY */ - -# define bin_pcre_study bin_notavail - -/**/ -#endif /* !HAVE_PCRE_STUDY */ - -/**/ static int -zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar, - char *substravar, int want_offset_pair, int matchedinarr, - int want_begin_end) +zpcre_get_substrings(char *arg, pcre2_match_data *mdata, int captured_count, + char *matchvar, char *substravar, int want_offset_pair, + int matchedinarr, int want_begin_end) { - char **captures, *match_all, **matches; + PCRE2_SIZE *ovec; + char *match_all, **matches; char offset_all[50]; int capture_start = 1; if (matchedinarr) { - /* bash-style captures[0] entire-matched string in the array */ + /* bash-style ovec[0] entire-matched string in the array */ capture_start = 0; } - /* captures[0] will be entire matched string, [1] first substring */ - if (!pcre_get_substring_list(arg, ovec, captured_count, (const char ***)&captures)) { - int nelem = arrlen(captures)-1; + /* ovec[0] will be entire matched string, [1] first substring */ + ovec = pcre2_get_ovector_pointer(mdata); + if (ovec) { + int nelem = captured_count - 1; /* Set to the offsets of the complete match */ if (want_offset_pair) { - sprintf(offset_all, "%d %d", ovec[0], ovec[1]); + sprintf(offset_all, "%ld %ld", ovec[0], ovec[1]); setsparam("ZPCRE_OP", ztrdup(offset_all)); } /* @@ -186,7 +158,7 @@ zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar, * ovec is length 2*(1+capture_list_length) */ if (matchvar) { - match_all = metafy(captures[0], ovec[1] - ovec[0], META_DUP); + match_all = metafy(arg + ovec[0], ovec[1] - ovec[0], META_DUP); setsparam(matchvar, match_all); } /* @@ -201,16 +173,12 @@ zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar, */ if (substravar && (!want_begin_end || nelem)) { - char **x, **y; + char **x; int vec_off, i; - y = &captures[capture_start]; matches = x = (char **) zalloc(sizeof(char *) * (captured_count+1-capture_start)); - for (i = capture_start; i < captured_count; i++, y++) { + for (i = capture_start; i < captured_count; i++) { vec_off = 2*i; - if (*y) - *x++ = metafy(*y, ovec[vec_off+1]-ovec[vec_off], META_DUP); - else - *x++ = NULL; + *x++ = metafy(arg + ovec[vec_off], ovec[vec_off+1]-ovec[vec_off], META_DUP); } *x = NULL; setaparam(substravar, matches); @@ -247,7 +215,8 @@ zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar, setiparam("MEND", offs + !isset(KSHARRAYS) - 1); if (nelem) { char **mbegin, **mend, **bptr, **eptr; - int i, *ipair; + int i; + size_t *ipair; bptr = mbegin = zalloc(sizeof(char*)*(nelem+1)); eptr = mend = zalloc(sizeof(char*)*(nelem+1)); @@ -287,8 +256,6 @@ zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar, setaparam("mend", mend); } } - - pcre_free_substring_list((const char **)captures); } return 0; @@ -314,7 +281,8 @@ getposint(char *instr, char *nam) static int bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) { - int ret, capcount, *ovec, ovecsize, c; + int ret, c; + pcre2_match_data *pcre_mdata = NULL; char *matched_portion = NULL; char *plaintext = NULL; char *receptacle = NULL; @@ -344,36 +312,30 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) /* For the entire match, 'Return' the offset byte positions instead of the matched string */ if(OPT_ISSET(ops,'b')) want_offset_pair = 1; - if ((ret = pcre_fullinfo(pcre_pattern, pcre_hints, PCRE_INFO_CAPTURECOUNT, &capcount))) - { - zwarnnam(nam, "error %d in fullinfo", ret); - return 1; - } - - ovecsize = (capcount+1)*3; - ovec = zalloc(ovecsize*sizeof(int)); - plaintext = ztrdup(*args); unmetafy(plaintext, &subject_len); if (offset_start > 0 && offset_start >= subject_len) - ret = PCRE_ERROR_NOMATCH; - else - ret = pcre_exec(pcre_pattern, pcre_hints, plaintext, subject_len, offset_start, 0, ovec, ovecsize); + ret = PCRE2_ERROR_NOMATCH; + else { + pcre_mdata = pcre2_match_data_create_from_pattern(pcre_pattern, NULL); + ret = pcre2_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len, + offset_start, 0, pcre_mdata, NULL); + } if (ret==0) return_value = 0; - else if (ret==PCRE_ERROR_NOMATCH) /* no match */; + else if (ret == PCRE2_ERROR_NOMATCH) /* no match */; else if (ret>0) { - zpcre_get_substrings(plaintext, ovec, ret, matched_portion, receptacle, + zpcre_get_substrings(plaintext, pcre_mdata, ret, matched_portion, receptacle, want_offset_pair, 0, 0); return_value = 0; } else { - zwarnnam(nam, "error in pcre_exec [%d]", ret); + zwarnnam(nam, "error in pcre2_match [%d]", ret); } - if (ovec) - zfree(ovec, ovecsize*sizeof(int)); + if (pcre_mdata) + pcre2_match_data_free(pcre_mdata); zsfree(plaintext); return return_value; @@ -383,17 +345,19 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) static int cond_pcre_match(char **a, int id) { - pcre *pcre_pat; - const char *pcre_err; + pcre2_code *pcre_pat = NULL; + int pcre_err; + PCRE2_SIZE pcre_erroff; char *lhstr, *rhre, *lhstr_plain, *rhre_plain, *avar, *svar; - int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize; + int r = 0, pcre_opts = 0; + pcre2_match_data *pcre_mdata = NULL; int lhstr_plain_len, rhre_plain_len; int return_value = 0; if (zpcre_utf8_enabled()) - pcre_opts |= PCRE_UTF8; + pcre_opts |= PCRE2_UTF; if (isset(REMATCHPCRE) && !isset(CASEMATCH)) - pcre_opts |= PCRE_CASELESS; + pcre_opts |= PCRE2_CASELESS; lhstr = cond_str(a,0,0); rhre = cond_str(a,1,0); @@ -401,9 +365,6 @@ cond_pcre_match(char **a, int id) rhre_plain = ztrdup(rhre); unmetafy(lhstr_plain, &lhstr_plain_len); unmetafy(rhre_plain, &rhre_plain_len); - pcre_pat = NULL; - ov = NULL; - ovsize = 0; if (isset(BASHREMATCH)) { svar = NULL; @@ -415,27 +376,27 @@ cond_pcre_match(char **a, int id) switch(id) { case CPCRE_PLAIN: - if ((int)strlen(rhre_plain) != rhre_plain_len) { - zwarn("embedded NULs in PCRE pattern terminate pattern"); - } - pcre_pat = pcre_compile(rhre_plain, pcre_opts, &pcre_err, &pcre_errptr, NULL); - if (pcre_pat == NULL) { - zwarn("failed to compile regexp /%s/: %s", rhre, pcre_err); + if (!(pcre_pat = pcre2_compile((PCRE2_SPTR) rhre_plain, + (PCRE2_SIZE) rhre_plain_len, pcre_opts, + &pcre_err, &pcre_erroff, NULL))) + { + PCRE2_UCHAR buffer[256]; + pcre2_get_error_message(pcre_err, buffer, sizeof(buffer)); + zwarn("failed to compile regexp /%s/: %s", rhre, buffer); break; } - pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt); - ovsize = (capcnt+1)*3; - ov = zalloc(ovsize*sizeof(int)); - r = pcre_exec(pcre_pat, NULL, lhstr_plain, lhstr_plain_len, 0, 0, ov, ovsize); - /* r < 0 => error; r==0 match but not enough size in ov + pcre_mdata = pcre2_match_data_create_from_pattern(pcre_pat, NULL); + r = pcre2_match(pcre_pat, (PCRE2_SPTR8) lhstr_plain, lhstr_plain_len, + 0, 0, pcre_mdata, NULL); + /* r < 0 => error; r==0 match but not enough size in match data * r > 0 => (r-1) substrings found; r==1 => no substrings */ if (r==0) { - zwarn("reportable zsh problem: pcre_exec() returned 0"); + zwarn("reportable zsh problem: pcre2_match() returned 0"); return_value = 1; break; } - else if (r==PCRE_ERROR_NOMATCH) { + else if (r == PCRE2_ERROR_NOMATCH) { return_value = 0; /* no match */ break; } @@ -444,7 +405,7 @@ cond_pcre_match(char **a, int id) break; } else if (r>0) { - zpcre_get_substrings(lhstr_plain, ov, r, svar, avar, 0, + zpcre_get_substrings(lhstr_plain, pcre_mdata, r, svar, avar, 0, isset(BASHREMATCH), !isset(BASHREMATCH)); return_value = 1; @@ -457,10 +418,10 @@ cond_pcre_match(char **a, int id) free(lhstr_plain); if(rhre_plain) free(rhre_plain); + if (pcre_mdata) + pcre2_match_data_free(pcre_mdata); if (pcre_pat) - pcre_free(pcre_pat); - if (ov) - zfree(ov, ovsize*sizeof(int)); + pcre2_code_free(pcre_pat); return return_value; } @@ -489,11 +450,11 @@ static struct builtin bintab[] = { static struct features module_features = { bintab, sizeof(bintab)/sizeof(*bintab), -#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC) +#if defined(HAVE_PCRE2_COMPILE_8) && defined(HAVE_PCRE2_H) cotab, sizeof(cotab)/sizeof(*cotab), -#else /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */ +#else /* !(HAVE_PCRE2_COMPILE_8 && HAVE_PCRE2_H) */ NULL, 0, -#endif /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */ +#endif /* !(HAVE_PCRE2_COMPILE_8 && HAVE_PCRE2_H) */ NULL, 0, NULL, 0, 0 @@ -540,19 +501,9 @@ cleanup_(Module m) int finish_(UNUSED(Module m)) { -#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC) -#ifdef HAVE_PCRE_STUDY - if (pcre_hints) -#ifdef PCRE_CONFIG_JIT - pcre_free_study(pcre_hints); -#else - pcre_free(pcre_hints); -#endif - pcre_hints = NULL; -#endif - +#if defined(HAVE_PCRE2_COMPILE_8) && defined(HAVE_PCRE2_H) if (pcre_pattern) - pcre_free(pcre_pattern); + pcre2_code_free(pcre_pattern); pcre_pattern = NULL; #endif diff --git a/Test/V07pcre.ztst b/Test/V07pcre.ztst index 22a0b64c7..6eb366964 100644 --- a/Test/V07pcre.ztst +++ b/Test/V07pcre.ztst @@ -117,12 +117,17 @@ >78884; ZPCRE_OP: 25 30 >90210; ZPCRE_OP: 31 36 -# Embedded NULs allowed in plaintext, but not in RE (although \0 as two-chars allowed) +# Embedded NULs allowed in plaintext, in RE, pcre supports \0 as two-chars [[ $'a\0bc\0d' =~ '^(a\0.)(.+)$' ]] print "${#MATCH}; ${#match[1]}; ${#match[2]}" 0:ensure ASCII NUL passes in and out of matched plaintext >6; 3; 3 +# PCRE2 supports NULs also in the RE + [[ $'a\0b\0c' =~ $'^(.\0)+' ]] && print "${#MATCH}; ${#match[1]}" +0:ensure ASCII NUL works also in the regex +>4; 2 + # Ensure the long-form infix operator works [[ foo -pcre-match ^f..$ ]] print $? @@ -169,7 +174,11 @@ [[ é =~ '^..\z' ]]; echo $? LANG=$LANG_SAVE [[ é =~ '^.\z' ]]; echo $? -0:swich between C/UTF-8 locales +0:switch between C/UTF-8 locales >0 >0 >0 + + [[ abc =~ 'a(d*)bc' ]] && print "$#MATCH; $#match; ${#match[1]}" +0:empty capture +>3; 1; 0 diff --git a/configure.ac b/configure.ac index d8a17791a..4710d1659 100644 --- a/configure.ac +++ b/configure.ac @@ -438,7 +438,7 @@ fi], dnl Do you want to look for pcre support? AC_ARG_ENABLE(pcre, -AS_HELP_STRING([--enable-pcre],[enable the search for the pcre library (may create run-time library dependencies)])) +AS_HELP_STRING([--enable-pcre],[enable the search for the pcre2 library (may create run-time library dependencies)])) dnl Do you want to look for capability support? AC_ARG_ENABLE(cap, @@ -652,13 +652,12 @@ AC_HEADER_SYS_WAIT oldcflags="$CFLAGS" if test x$enable_pcre = xyes; then -AC_CHECK_PROG([PCRECONF], pcre-config, pcre-config) -dnl Typically (meaning on this single RedHat 9 box in front of me) -dnl pcre-config --cflags produces a -I output which needs to go into +AC_CHECK_PROG([PCRECONF], pcre2-config, pcre2-config) +dnl pcre2-config --cflags may produce a -I output which needs to go into dnl CPPFLAGS else configure's preprocessor tests don't pick it up, dnl producing a warning. -if test "x$ac_cv_prog_PCRECONF" = xpcre-config; then - CPPFLAGS="$CPPFLAGS `pcre-config --cflags`" +if test "x$ac_cv_prog_PCRECONF" = xpcre2-config; then + CPPFLAGS="$CPPFLAGS `pcre2-config --cflags`" fi fi @@ -668,9 +667,10 @@ AC_CHECK_HEADERS(sys/time.h sys/times.h sys/select.h termcap.h termio.h \ locale.h errno.h stdio.h stdarg.h varargs.h stdlib.h \ unistd.h sys/capability.h \ utmp.h utmpx.h sys/types.h pwd.h grp.h poll.h sys/mman.h \ - netinet/in_systm.h pcre.h langinfo.h wchar.h stddef.h \ + netinet/in_systm.h langinfo.h wchar.h stddef.h \ sys/stropts.h iconv.h ncurses.h ncursesw/ncurses.h \ ncurses/ncurses.h) +AC_CHECK_HEADERS([pcre2.h],,,[#define PCRE2_CODE_UNIT_WIDTH 8]) if test x$dynamic = xyes; then AC_CHECK_HEADERS(dlfcn.h) AC_CHECK_HEADERS(dl.h) @@ -948,9 +948,7 @@ if test "x$ac_found_iconv" = "xyes"; then fi if test x$enable_pcre = xyes; then -dnl pcre-config should probably be employed here -dnl AC_SEARCH_LIBS(pcre_compile, pcre) - LIBS="`$ac_cv_prog_PCRECONF --libs` $LIBS" + LIBS="`$ac_cv_prog_PCRECONF --libs8` $LIBS" fi dnl --------------------- @@ -1313,7 +1311,7 @@ AC_CHECK_FUNCS(strftime strptime mktime timelocal \ pathconf sysconf \ tgetent tigetflag tigetnum tigetstr setupterm initscr resize_term \ getcchar setcchar waddwstr wget_wch win_wch use_default_colors \ - pcre_compile pcre_study pcre_exec \ + pcre2_compile_8 \ nl_langinfo \ erand48 open_memstream \ posix_openpt \ -- cgit v1.2.3 From f3f371deb376478176866fd770fbcf9bc0d0609f Mon Sep 17 00:00:00 2001 From: Oliver Kiddle Date: Sat, 13 May 2023 00:56:48 +0200 Subject: 51728: assign pcre named capture groups to a hash --- ChangeLog | 3 +++ Doc/Zsh/mod_pcre.yo | 10 ++++++---- Src/Modules/pcre.c | 43 +++++++++++++++++++++++++++++++++---------- Test/V07pcre.ztst | 14 ++++++++++++++ 4 files changed, 56 insertions(+), 14 deletions(-) (limited to 'Src/Modules/pcre.c') diff --git a/ChangeLog b/ChangeLog index 285b73b2c..2835a9405 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2023-05-13 Oliver Kiddle + * 51728: Doc/Zsh/mod_pcre.yo, Src/Modules/pcre.c, + Test/V07pcre.ztst: assign pcre named capture groups to a hash + * 51723: Src/Modules/pcre.c, Test/V07pcre.ztst, configure.ac: migrate pcre module to pcre2 diff --git a/Doc/Zsh/mod_pcre.yo b/Doc/Zsh/mod_pcre.yo index c2817f519..6d073985d 100644 --- a/Doc/Zsh/mod_pcre.yo +++ b/Doc/Zsh/mod_pcre.yo @@ -20,12 +20,12 @@ including those that indicate newline. ) findex(pcre_study) item(tt(pcre_study))( -Studies the previously-compiled PCRE which may result in faster -matching. +Requests JIT compilation for the previously-compiled PCRE which +may result in faster matching. ) findex(pcre_match) item(tt(pcre_match) [ tt(-v) var(var) ] [ tt(-a) var(arr) ] \ -[ tt(-n) var(offset) ] [ tt(-b) ] var(string))( +[ tt(-A) var(assoc) ] [ tt(-n) var(offset) ] [ tt(-b) ] var(string))( Returns successfully if tt(string) matches the previously-compiled PCRE. @@ -36,7 +36,9 @@ substrings, unless the tt(-a) option is given, in which case it will set the array var(arr). Similarly, the variable tt(MATCH) will be set to the entire matched portion of the string, unless the tt(-v) option is given, in which case the variable -var(var) will be set. +var(var) will be set. Furthermore, any named captures will +be stored in the associative array tt(.pcre.match) unless an +alternative is given with tt(-A). No variables are altered if there is no successful match. A tt(-n) option starts searching for a match from the byte var(offset) position in var(string). If the tt(-b) option is given, diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c index 079ecc2c5..6be1f76e2 100644 --- a/Src/Modules/pcre.c +++ b/Src/Modules/pcre.c @@ -129,14 +129,17 @@ bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int f } static int -zpcre_get_substrings(char *arg, pcre2_match_data *mdata, int captured_count, - char *matchvar, char *substravar, int want_offset_pair, - int matchedinarr, int want_begin_end) +zpcre_get_substrings(pcre2_code *pat, char *arg, pcre2_match_data *mdata, + int captured_count, char *matchvar, char *substravar, char *namedassoc, + int want_offset_pair, int matchedinarr, int want_begin_end) { PCRE2_SIZE *ovec; char *match_all, **matches; char offset_all[50]; int capture_start = 1; + int vec_off; + PCRE2_SPTR ntable; /* table of named captures */ + uint32_t ncount, nsize; if (matchedinarr) { /* bash-style ovec[0] entire-matched string in the array */ @@ -174,7 +177,7 @@ zpcre_get_substrings(char *arg, pcre2_match_data *mdata, int captured_count, if (substravar && (!want_begin_end || nelem)) { char **x; - int vec_off, i; + int i; matches = x = (char **) zalloc(sizeof(char *) * (captured_count+1-capture_start)); for (i = capture_start; i < captured_count; i++) { vec_off = 2*i; @@ -184,6 +187,23 @@ zpcre_get_substrings(char *arg, pcre2_match_data *mdata, int captured_count, setaparam(substravar, matches); } + if (!pcre2_pattern_info(pat, PCRE2_INFO_NAMECOUNT, &ncount) && ncount + && !pcre2_pattern_info(pat, PCRE2_INFO_NAMEENTRYSIZE, &nsize) + && !pcre2_pattern_info(pat, PCRE2_INFO_NAMETABLE, &ntable)) + { + char **hash, **hashptr; + uint32_t nidx; + hashptr = hash = (char **)zshcalloc((ncount+1)*2*sizeof(char *)); + for (nidx = 0; nidx < ncount; nidx++) { + vec_off = (ntable[nsize * nidx] << 9) + 2 * ntable[nsize * nidx + 1]; + /* would metafy the key but pcre limits characters in the name */ + *hashptr++ = ztrdup((char *) ntable + nsize * nidx + 2); + *hashptr++ = metafy(arg + ovec[vec_off], + ovec[vec_off+1]-ovec[vec_off], META_DUP); + } + sethparam(namedassoc, hash); + } + if (want_begin_end) { /* * cond-infix rather than builtin; also not bash; so we set a bunch @@ -286,6 +306,7 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) char *matched_portion = NULL; char *plaintext = NULL; char *receptacle = NULL; + char *named = ".pcre.match"; int return_value = 1; /* The subject length and offset start are both int values in pcre_exec */ int subject_len; @@ -305,6 +326,9 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) if(OPT_HASARG(ops,c='v')) { matched_portion = OPT_ARG(ops,c); } + if (OPT_HASARG(ops, c='A')) { + named = OPT_ARG(ops, c); + } if(OPT_HASARG(ops,c='n')) { /* The offset position to start the search, in bytes. */ if ((offset_start = getposint(OPT_ARG(ops,c), nam)) < 0) return 1; @@ -326,8 +350,8 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) if (ret==0) return_value = 0; else if (ret == PCRE2_ERROR_NOMATCH) /* no match */; else if (ret>0) { - zpcre_get_substrings(plaintext, pcre_mdata, ret, matched_portion, receptacle, - want_offset_pair, 0, 0); + zpcre_get_substrings(pcre_pattern, plaintext, pcre_mdata, ret, matched_portion, + receptacle, named, want_offset_pair, 0, 0); return_value = 0; } else { @@ -405,9 +429,8 @@ cond_pcre_match(char **a, int id) break; } else if (r>0) { - zpcre_get_substrings(lhstr_plain, pcre_mdata, r, svar, avar, 0, - isset(BASHREMATCH), - !isset(BASHREMATCH)); + zpcre_get_substrings(pcre_pat, lhstr_plain, pcre_mdata, r, svar, avar, + ".pcre.match", 0, isset(BASHREMATCH), !isset(BASHREMATCH)); return_value = 1; break; } @@ -443,7 +466,7 @@ static struct conddef cotab[] = { static struct builtin bintab[] = { BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimxs", NULL), - BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "a:v:n:b", NULL), + BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "A:a:v:n:b", NULL), BUILTIN("pcre_study", 0, bin_pcre_study, 0, 0, 0, NULL, NULL) }; diff --git a/Test/V07pcre.ztst b/Test/V07pcre.ztst index 6eb366964..027fea3aa 100644 --- a/Test/V07pcre.ztst +++ b/Test/V07pcre.ztst @@ -182,3 +182,17 @@ [[ abc =~ 'a(d*)bc' ]] && print "$#MATCH; $#match; ${#match[1]}" 0:empty capture >3; 1; 0 + + [[ category/name-12345 =~ '(?x)^ + (? [^/]* ) / + (? + (? \w+ ) - + (? \d+ ))$' ]] + typeset -p1 .pcre.match +0:named captures +>typeset -g -A .pcre.match=( +> [category]=category +> [name]=name +> [package]=name-12345 +> [version]=12345 +>) -- cgit v1.2.3 From b4d1c756f50909b4a13e5c8fe5f26f71e9d54f63 Mon Sep 17 00:00:00 2001 From: Oliver Kiddle Date: Sat, 13 May 2023 00:59:00 +0200 Subject: 51738: support pcre's alternative DFA matching algorithm --- ChangeLog | 3 +++ Doc/Zsh/mod_pcre.yo | 6 +++++- Src/Modules/pcre.c | 53 ++++++++++++++++++++++++++++++++++++----------------- Test/V07pcre.ztst | 5 +++++ 4 files changed, 49 insertions(+), 18 deletions(-) (limited to 'Src/Modules/pcre.c') diff --git a/ChangeLog b/ChangeLog index 2835a9405..18bc4a698 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2023-05-13 Oliver Kiddle + * 51738: Doc/Zsh/mod_pcre.yo, Src/Modules/pcre.c, + Test/V07pcre.ztst: support pcre's DFA matching algorithm + * 51728: Doc/Zsh/mod_pcre.yo, Src/Modules/pcre.c, Test/V07pcre.ztst: assign pcre named capture groups to a hash diff --git a/Doc/Zsh/mod_pcre.yo b/Doc/Zsh/mod_pcre.yo index 6d073985d..da73ac85a 100644 --- a/Doc/Zsh/mod_pcre.yo +++ b/Doc/Zsh/mod_pcre.yo @@ -25,7 +25,7 @@ may result in faster matching. ) findex(pcre_match) item(tt(pcre_match) [ tt(-v) var(var) ] [ tt(-a) var(arr) ] \ -[ tt(-A) var(assoc) ] [ tt(-n) var(offset) ] [ tt(-b) ] var(string))( +[ tt(-A) var(assoc) ] [ tt(-n) var(offset) ] [ tt(-bd) ] var(string))( Returns successfully if tt(string) matches the previously-compiled PCRE. @@ -69,6 +69,10 @@ print -l $accum) ) enditem() +The option tt(-d) uses the alternative breadth-first DFA search algorithm of +pcre. This sets tt(match), or the array given with tt(-a), to all the matches +found from the same start point in the subject. + The tt(zsh/pcre) module makes available the following test condition: startitem() diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c index 6be1f76e2..96f3c6e65 100644 --- a/Src/Modules/pcre.c +++ b/Src/Modules/pcre.c @@ -305,30 +305,29 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) pcre2_match_data *pcre_mdata = NULL; char *matched_portion = NULL; char *plaintext = NULL; - char *receptacle = NULL; - char *named = ".pcre.match"; + char *receptacle; + char *named = NULL; int return_value = 1; /* The subject length and offset start are both int values in pcre_exec */ int subject_len; int offset_start = 0; int want_offset_pair = 0; + int use_dfa = 0; if (pcre_pattern == NULL) { zwarnnam(nam, "no pattern has been compiled"); return 1; } - matched_portion = "MATCH"; - receptacle = "match"; - if(OPT_HASARG(ops,c='a')) { - receptacle = OPT_ARG(ops,c); - } - if(OPT_HASARG(ops,c='v')) { - matched_portion = OPT_ARG(ops,c); - } - if (OPT_HASARG(ops, c='A')) { - named = OPT_ARG(ops, c); + if (!(use_dfa = OPT_ISSET(ops, 'd'))) { + matched_portion = OPT_HASARG(ops, c='v') ? OPT_ARG(ops, c) : "MATCH"; + named = OPT_HASARG(ops, c='A') ? OPT_ARG(ops, c) : ".pcre.match"; + } else if (OPT_HASARG(ops, c='v') || OPT_HASARG(ops, c='A')) { + zwarnnam(nam, "-d cannot be combined with -%c", c); + return 1; } + receptacle = OPT_HASARG(ops, 'a') ? OPT_ARG(ops, 'a') : "match"; + if(OPT_HASARG(ops,c='n')) { /* The offset position to start the search, in bytes. */ if ((offset_start = getposint(OPT_ARG(ops,c), nam)) < 0) return 1; @@ -341,7 +340,25 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) if (offset_start > 0 && offset_start >= subject_len) ret = PCRE2_ERROR_NOMATCH; - else { + else if (use_dfa) { + PCRE2_SIZE old, wscount = 128, capcount = 128; + void *workspace = zhalloc(sizeof(int) * wscount); + pcre_mdata = pcre2_match_data_create(capcount, NULL); + do { + ret = pcre2_dfa_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len, + offset_start, 0, pcre_mdata, NULL, (int *) workspace, wscount); + if (ret == PCRE2_ERROR_DFA_WSSIZE) { + old = wscount; + wscount += wscount / 2; + workspace = hrealloc(workspace, sizeof(int) * old, sizeof(int) * wscount); + } else if (ret == 0) { + capcount += capcount / 2; + pcre2_match_data_free(pcre_mdata); + pcre_mdata = pcre2_match_data_create(capcount, NULL); + } else + break; + } while(1); + } else { pcre_mdata = pcre2_match_data_create_from_pattern(pcre_pattern, NULL); ret = pcre2_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len, offset_start, 0, pcre_mdata, NULL); @@ -350,12 +367,14 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) if (ret==0) return_value = 0; else if (ret == PCRE2_ERROR_NOMATCH) /* no match */; else if (ret>0) { - zpcre_get_substrings(pcre_pattern, plaintext, pcre_mdata, ret, matched_portion, - receptacle, named, want_offset_pair, 0, 0); + zpcre_get_substrings(pcre_pattern, plaintext, pcre_mdata, ret, + matched_portion, receptacle, named, want_offset_pair, use_dfa, 0); return_value = 0; } else { - zwarnnam(nam, "error in pcre2_match [%d]", ret); + PCRE2_UCHAR buffer[256]; + pcre2_get_error_message(ret, buffer, sizeof(buffer)); + zwarnnam(nam, "error in pcre matching for /%s/: %s", plaintext, buffer); } if (pcre_mdata) @@ -466,7 +485,7 @@ static struct conddef cotab[] = { static struct builtin bintab[] = { BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimxs", NULL), - BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "A:a:v:n:b", NULL), + BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "A:a:v:n:bd", NULL), BUILTIN("pcre_study", 0, bin_pcre_study, 0, 0, 0, NULL, NULL) }; diff --git a/Test/V07pcre.ztst b/Test/V07pcre.ztst index 027fea3aa..585698d05 100644 --- a/Test/V07pcre.ztst +++ b/Test/V07pcre.ztst @@ -196,3 +196,8 @@ > [package]=name-12345 > [version]=12345 >) + + pcre_compile 'cat(er(pillar)?)?' + pcre_match -d 'the caterpillar catchment' && print $match +0:pcre_match -d +>caterpillar cater cat -- cgit v1.2.3 From 9f57ca4ac8ae071727b1d77cbb8c4c0d893b9099 Mon Sep 17 00:00:00 2001 From: Oliver Kiddle Date: Wed, 1 Nov 2023 00:34:29 +0100 Subject: 52252: Coverity defect 1547827 --- ChangeLog | 2 ++ Src/Modules/pcre.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'Src/Modules/pcre.c') diff --git a/ChangeLog b/ChangeLog index 96d8cab8a..10b7ab867 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,7 @@ 2023-11-01 Oliver Kiddle + * 52252: Src/Modules/pcre.c: Coverity defect 1547827 + * Shohei YOSHIDA: 52179: Completion/Unix/Command/_gradle: Fix gradlew completion when it isn't in PATH diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c index 96f3c6e65..f5cda6d38 100644 --- a/Src/Modules/pcre.c +++ b/Src/Modules/pcre.c @@ -187,7 +187,8 @@ zpcre_get_substrings(pcre2_code *pat, char *arg, pcre2_match_data *mdata, setaparam(substravar, matches); } - if (!pcre2_pattern_info(pat, PCRE2_INFO_NAMECOUNT, &ncount) && ncount + if (namedassoc + && !pcre2_pattern_info(pat, PCRE2_INFO_NAMECOUNT, &ncount) && ncount && !pcre2_pattern_info(pat, PCRE2_INFO_NAMEENTRYSIZE, &nsize) && !pcre2_pattern_info(pat, PCRE2_INFO_NAMETABLE, &ntable)) { -- cgit v1.2.3 From de635b4ee56c188ccbaf0009027f9d1c0d42af0f Mon Sep 17 00:00:00 2001 From: Oliver Kiddle Date: Tue, 31 Oct 2023 01:04:19 +0100 Subject: 52253: support pcre callouts with shell evaluation of the callout string --- ChangeLog | 3 +++ Doc/Zsh/mod_pcre.yo | 5 +++++ Src/Modules/pcre.c | 34 ++++++++++++++++++++++++++++++++-- 3 files changed, 40 insertions(+), 2 deletions(-) (limited to 'Src/Modules/pcre.c') diff --git a/ChangeLog b/ChangeLog index 719222048..3f1014db3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2023-11-02 Oliver Kiddle + * 52253: Src/Modules/pcre.c: support pcre callouts with shell + evaluation of the callout string + * 52260: Completion/Unix/Command/_sudo: handle variable assignments before the command in sudo completion diff --git a/Doc/Zsh/mod_pcre.yo b/Doc/Zsh/mod_pcre.yo index da73ac85a..41fab4475 100644 --- a/Doc/Zsh/mod_pcre.yo +++ b/Doc/Zsh/mod_pcre.yo @@ -69,6 +69,11 @@ print -l $accum) ) enditem() +If the regular expression contains callouts, these are executed as shell code. +During the execution of the callout, the string the regular expression is +matching against is available in the parameter tt(.pcre.subject). If there is a +non-zero return status from the shell code, the callout does not match. + The option tt(-d) uses the alternative breadth-first DFA search algorithm of pcre. This sets tt(match), or the array given with tt(-a), to all the matches found from the same start point in the subject. diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c index f5cda6d38..e6b59831f 100644 --- a/Src/Modules/pcre.c +++ b/Src/Modules/pcre.c @@ -128,6 +128,31 @@ bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int f return 0; } +static int +pcre_callout(pcre2_callout_block_8 *block, void *) +{ + Eprog prog; + int ret=0; + + if (!block->callout_number && + ((prog = parse_string((char *) block->callout_string, 0)))) + { + int ef = errflag, lv = lastval; + + setsparam(".pcre.subject", + metafy((char *) block->subject, block->subject_length, META_DUP)); + setiparam(".pcre.pos", block->current_position + 1); + execode(prog, 1, 0, "pcre"); + ret = lastval | errflag; + + /* Restore any user interrupt error status */ + errflag = ef | (errflag & ERRFLAG_INT); + lastval = lv; + } + + return ret; +} + static int zpcre_get_substrings(pcre2_code *pat, char *arg, pcre2_match_data *mdata, int captured_count, char *matchvar, char *substravar, char *namedassoc, @@ -339,6 +364,9 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) plaintext = ztrdup(*args); unmetafy(plaintext, &subject_len); + pcre2_match_context_8 *mcontext = pcre2_match_context_create(NULL); + pcre2_set_callout(mcontext, &pcre_callout, 0); + if (offset_start > 0 && offset_start >= subject_len) ret = PCRE2_ERROR_NOMATCH; else if (use_dfa) { @@ -347,7 +375,7 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) pcre_mdata = pcre2_match_data_create(capcount, NULL); do { ret = pcre2_dfa_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len, - offset_start, 0, pcre_mdata, NULL, (int *) workspace, wscount); + offset_start, 0, pcre_mdata, mcontext, (int *) workspace, wscount); if (ret == PCRE2_ERROR_DFA_WSSIZE) { old = wscount; wscount += wscount / 2; @@ -362,7 +390,7 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) } else { pcre_mdata = pcre2_match_data_create_from_pattern(pcre_pattern, NULL); ret = pcre2_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len, - offset_start, 0, pcre_mdata, NULL); + offset_start, 0, pcre_mdata, mcontext); } if (ret==0) return_value = 0; @@ -380,6 +408,8 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) if (pcre_mdata) pcre2_match_data_free(pcre_mdata); + if (mcontext) + pcre2_match_context_free(mcontext); zsfree(plaintext); return return_value; -- cgit v1.2.3 From bad8af1808e3809285fbb33a5a168f09247c4793 Mon Sep 17 00:00:00 2001 From: Oliver Kiddle Date: Wed, 8 Nov 2023 02:32:23 +0100 Subject: 52271: use correct form for unused parameter --- ChangeLog | 4 ++++ Src/Modules/pcre.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'Src/Modules/pcre.c') diff --git a/ChangeLog b/ChangeLog index 6c36d838e..0997ef5c7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2023-11-08 Oliver Kiddle + + * 52271: Src/Modules/pcre.c: use correct form for unused parameter + 2023-11-02 Oliver Kiddle * 52268: Completion/Linux/Command/_networkmanager: update to 1.42.2 diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c index e6b59831f..e48ae3ae5 100644 --- a/Src/Modules/pcre.c +++ b/Src/Modules/pcre.c @@ -129,7 +129,7 @@ bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int f } static int -pcre_callout(pcre2_callout_block_8 *block, void *) +pcre_callout(pcre2_callout_block_8 *block, UNUSED(void *callout_data)) { Eprog prog; int ret=0; -- cgit v1.2.3 From 698af7bc1387462c8e87767d7eaeb7e30c6f0b2b Mon Sep 17 00:00:00 2001 From: Oliver Kiddle Date: Fri, 26 Jan 2024 07:33:38 +0100 Subject: 52405, 52502: add empty elements to $match for optional captures that don't match --- ChangeLog | 3 +++ Src/Modules/pcre.c | 5 ++++- Test/V07pcre.ztst | 5 +++++ 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'Src/Modules/pcre.c') diff --git a/ChangeLog b/ChangeLog index 280eac2de..e73320081 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2024-01-26 Oliver Kiddle + * 52405, 52502: Src/Modules/pcre.c, Test/V07pcre.ztst: + add empty elements to $match for optional captures that don't match + * github #110: opensauce04: Completion/Redhat/Command/_dnf: Fix incorrect completion for `dnf --showduplicates` diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c index e48ae3ae5..a49d1a307 100644 --- a/Src/Modules/pcre.c +++ b/Src/Modules/pcre.c @@ -391,6 +391,8 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) pcre_mdata = pcre2_match_data_create_from_pattern(pcre_pattern, NULL); ret = pcre2_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len, offset_start, 0, pcre_mdata, mcontext); + if (ret > 0) + ret = pcre2_get_ovector_count(pcre_mdata); } if (ret==0) return_value = 0; @@ -479,7 +481,8 @@ cond_pcre_match(char **a, int id) break; } else if (r>0) { - zpcre_get_substrings(pcre_pat, lhstr_plain, pcre_mdata, r, svar, avar, + uint32_t ovec_count = pcre2_get_ovector_count(pcre_mdata); + zpcre_get_substrings(pcre_pat, lhstr_plain, pcre_mdata, ovec_count, svar, avar, ".pcre.match", 0, isset(BASHREMATCH), !isset(BASHREMATCH)); return_value = 1; break; diff --git a/Test/V07pcre.ztst b/Test/V07pcre.ztst index 585698d05..b8cd31c96 100644 --- a/Test/V07pcre.ztst +++ b/Test/V07pcre.ztst @@ -108,6 +108,11 @@ >0 xo→t →t >0 Xo→t →t + [[ foo =~ (pre)?f(o*)(opt(i)onal)?(y)* ]] + typeset -p match +0:Empty string for optional captures that don't match +>typeset -g -a match=( '' oo '' '' '' ) + string="The following zip codes: 78884 90210 99513" pcre_compile -m "\d{5}" pcre_match -b -- $string && print "$MATCH; ZPCRE_OP: $ZPCRE_OP" -- cgit v1.2.3 From 47c7bc9b1493c7374f076b5471cfd57ee30f4ba5 Mon Sep 17 00:00:00 2001 From: Stephane Chazelas Date: Sat, 9 Mar 2024 11:45:46 +0000 Subject: 52721: fix metafication and regexp/subject confusion in pcre_match error message --- ChangeLog | 5 +++++ Src/Modules/pcre.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'Src/Modules/pcre.c') diff --git a/ChangeLog b/ChangeLog index ef6c9f02d..620c74fcf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2024-03-09 Stephane Chazelas + + * 52721: fix metafication and regexp/subject confusion in + pcre_match error message. + 2024-03-08 Stephane Chazelas * 52704: Doc/Zsh/params.yo, mention new ${ ... } and ${|...} diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c index a49d1a307..67157cc01 100644 --- a/Src/Modules/pcre.c +++ b/Src/Modules/pcre.c @@ -405,7 +405,7 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) else { PCRE2_UCHAR buffer[256]; pcre2_get_error_message(ret, buffer, sizeof(buffer)); - zwarnnam(nam, "error in pcre matching for /%s/: %s", plaintext, buffer); + zwarnnam(nam, "error in pcre matching for %s: %s", *args, buffer); } if (pcre_mdata) -- cgit v1.2.3