summaryrefslogtreecommitdiff
path: root/Src/Modules/pcre.c
diff options
context:
space:
mode:
Diffstat (limited to 'Src/Modules/pcre.c')
-rw-r--r--Src/Modules/pcre.c327
1 files changed, 174 insertions, 153 deletions
diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c
index 6289e003e..67157cc01 100644
--- a/Src/Modules/pcre.c
+++ b/Src/Modules/pcre.c
@@ -34,11 +34,11 @@
#define CPCRE_PLAIN 0
/**/
-#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC)
-#include <pcre.h>
+#if defined(HAVE_PCRE2_COMPILE_8) && defined(HAVE_PCRE2_H)
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
-static pcre *pcre_pattern;
-static pcre_extra *pcre_hints;
+static pcre2_code *pcre_pattern;
/**/
static int
@@ -47,8 +47,6 @@ zpcre_utf8_enabled(void)
#if defined(MULTIBYTE_SUPPORT) && defined(HAVE_NL_LANGINFO) && defined(CODESET)
static int have_utf8_pcre = -1;
- /* value can toggle based on MULTIBYTE, so don't
- * be too eager with caching */
if (have_utf8_pcre < -1)
return 0;
@@ -56,15 +54,11 @@ zpcre_utf8_enabled(void)
return 0;
if ((have_utf8_pcre == -1) &&
- (!strcmp(nl_langinfo(CODESET), "UTF-8"))) {
-
- if (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre))
- have_utf8_pcre = -2; /* erk, failed to ask */
+ (pcre2_config(PCRE2_CONFIG_UNICODE, &have_utf8_pcre))) {
+ have_utf8_pcre = -2; /* erk, failed to ask */
}
- if (have_utf8_pcre < 0)
- return 0;
- return have_utf8_pcre;
+ return (have_utf8_pcre == 1) && (!strcmp(nl_langinfo(CODESET), "UTF-8"));
#else
return 0;
@@ -75,47 +69,38 @@ zpcre_utf8_enabled(void)
static int
bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func))
{
- int pcre_opts = 0, pcre_errptr, target_len;
- const char *pcre_error;
+ uint32_t pcre_opts = 0;
+ int target_len;
+ int pcre_error;
+ PCRE2_SIZE pcre_offset;
char *target;
- if(OPT_ISSET(ops,'a')) pcre_opts |= PCRE_ANCHORED;
- if(OPT_ISSET(ops,'i')) pcre_opts |= PCRE_CASELESS;
- if(OPT_ISSET(ops,'m')) pcre_opts |= PCRE_MULTILINE;
- if(OPT_ISSET(ops,'x')) pcre_opts |= PCRE_EXTENDED;
- if(OPT_ISSET(ops,'s')) pcre_opts |= PCRE_DOTALL;
+ if (OPT_ISSET(ops, 'a')) pcre_opts |= PCRE2_ANCHORED;
+ if (OPT_ISSET(ops, 'i')) pcre_opts |= PCRE2_CASELESS;
+ if (OPT_ISSET(ops, 'm')) pcre_opts |= PCRE2_MULTILINE;
+ if (OPT_ISSET(ops, 'x')) pcre_opts |= PCRE2_EXTENDED;
+ if (OPT_ISSET(ops, 's')) pcre_opts |= PCRE2_DOTALL;
if (zpcre_utf8_enabled())
- pcre_opts |= PCRE_UTF8;
-
-#ifdef HAVE_PCRE_STUDY
- if (pcre_hints)
-#ifdef PCRE_CONFIG_JIT
- pcre_free_study(pcre_hints);
-#else
- pcre_free(pcre_hints);
-#endif
- pcre_hints = NULL;
-#endif
+ pcre_opts |= PCRE2_UTF;
if (pcre_pattern)
- pcre_free(pcre_pattern);
+ pcre2_code_free(pcre_pattern);
pcre_pattern = NULL;
target = ztrdup(*args);
unmetafy(target, &target_len);
- if ((int)strlen(target) != target_len) {
- zwarnnam(nam, "embedded NULs in PCRE pattern terminate pattern");
- }
-
- pcre_pattern = pcre_compile(target, pcre_opts, &pcre_error, &pcre_errptr, NULL);
+ pcre_pattern = pcre2_compile((PCRE2_SPTR) target, (PCRE2_SIZE) target_len,
+ pcre_opts, &pcre_error, &pcre_offset, NULL);
free(target);
if (pcre_pattern == NULL)
{
- zwarnnam(nam, "error in regex: %s", pcre_error);
+ PCRE2_UCHAR buffer[256];
+ pcre2_get_error_message(pcre_error, buffer, sizeof(buffer));
+ zwarnnam(nam, "error in regex: %s", buffer);
return 1;
}
@@ -123,67 +108,76 @@ bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func))
}
/**/
-#ifdef HAVE_PCRE_STUDY
-
-/**/
static int
bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int func))
{
- const char *pcre_error;
-
if (pcre_pattern == NULL)
{
zwarnnam(nam, "no pattern has been compiled for study");
return 1;
}
-
- if (pcre_hints)
-#ifdef PCRE_CONFIG_JIT
- pcre_free_study(pcre_hints);
-#else
- pcre_free(pcre_hints);
-#endif
- pcre_hints = NULL;
- pcre_hints = pcre_study(pcre_pattern, 0, &pcre_error);
- if (pcre_error != NULL)
- {
- zwarnnam(nam, "error while studying regex: %s", pcre_error);
- return 1;
+ int jit = 0;
+ if (!pcre2_config(PCRE2_CONFIG_JIT, &jit) && jit) {
+ if (pcre2_jit_compile(pcre_pattern, PCRE2_JIT_COMPLETE) < 0) {
+ zwarnnam(nam, "error while studying regex");
+ return 1;
+ }
}
return 0;
}
-/**/
-#else /* !HAVE_PCRE_STUDY */
+static int
+pcre_callout(pcre2_callout_block_8 *block, UNUSED(void *callout_data))
+{
+ Eprog prog;
+ int ret=0;
-# define bin_pcre_study bin_notavail
+ if (!block->callout_number &&
+ ((prog = parse_string((char *) block->callout_string, 0))))
+ {
+ int ef = errflag, lv = lastval;
-/**/
-#endif /* !HAVE_PCRE_STUDY */
+ setsparam(".pcre.subject",
+ metafy((char *) block->subject, block->subject_length, META_DUP));
+ setiparam(".pcre.pos", block->current_position + 1);
+ execode(prog, 1, 0, "pcre");
+ ret = lastval | errflag;
+
+ /* Restore any user interrupt error status */
+ errflag = ef | (errflag & ERRFLAG_INT);
+ lastval = lv;
+ }
+
+ return ret;
+}
-/**/
static int
-zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar,
- char *substravar, int want_offset_pair, int matchedinarr,
- int want_begin_end)
+zpcre_get_substrings(pcre2_code *pat, char *arg, pcre2_match_data *mdata,
+ int captured_count, char *matchvar, char *substravar, char *namedassoc,
+ int want_offset_pair, int matchedinarr, int want_begin_end)
{
- char **captures, *match_all, **matches;
+ PCRE2_SIZE *ovec;
+ char *match_all, **matches;
char offset_all[50];
int capture_start = 1;
+ int vec_off;
+ PCRE2_SPTR ntable; /* table of named captures */
+ uint32_t ncount, nsize;
if (matchedinarr) {
- /* bash-style captures[0] entire-matched string in the array */
+ /* bash-style ovec[0] entire-matched string in the array */
capture_start = 0;
}
- /* captures[0] will be entire matched string, [1] first substring */
- if (!pcre_get_substring_list(arg, ovec, captured_count, (const char ***)&captures)) {
- int nelem = arrlen(captures)-1;
+ /* ovec[0] will be entire matched string, [1] first substring */
+ ovec = pcre2_get_ovector_pointer(mdata);
+ if (ovec) {
+ int nelem = captured_count - 1;
/* Set to the offsets of the complete match */
if (want_offset_pair) {
- sprintf(offset_all, "%d %d", ovec[0], ovec[1]);
+ sprintf(offset_all, "%ld %ld", ovec[0], ovec[1]);
setsparam("ZPCRE_OP", ztrdup(offset_all));
}
/*
@@ -192,7 +186,7 @@ zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar,
* ovec is length 2*(1+capture_list_length)
*/
if (matchvar) {
- match_all = metafy(captures[0], ovec[1] - ovec[0], META_DUP);
+ match_all = metafy(arg + ovec[0], ovec[1] - ovec[0], META_DUP);
setsparam(matchvar, match_all);
}
/*
@@ -207,21 +201,35 @@ zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar,
*/
if (substravar &&
(!want_begin_end || nelem)) {
- char **x, **y;
- int vec_off, i;
- y = &captures[capture_start];
+ char **x;
+ int i;
matches = x = (char **) zalloc(sizeof(char *) * (captured_count+1-capture_start));
- for (i = capture_start; i < captured_count; i++, y++) {
+ for (i = capture_start; i < captured_count; i++) {
vec_off = 2*i;
- if (*y)
- *x++ = metafy(*y, ovec[vec_off+1]-ovec[vec_off], META_DUP);
- else
- *x++ = NULL;
+ *x++ = metafy(arg + ovec[vec_off], ovec[vec_off+1]-ovec[vec_off], META_DUP);
}
*x = NULL;
setaparam(substravar, matches);
}
+ if (namedassoc
+ && !pcre2_pattern_info(pat, PCRE2_INFO_NAMECOUNT, &ncount) && ncount
+ && !pcre2_pattern_info(pat, PCRE2_INFO_NAMEENTRYSIZE, &nsize)
+ && !pcre2_pattern_info(pat, PCRE2_INFO_NAMETABLE, &ntable))
+ {
+ char **hash, **hashptr;
+ uint32_t nidx;
+ hashptr = hash = (char **)zshcalloc((ncount+1)*2*sizeof(char *));
+ for (nidx = 0; nidx < ncount; nidx++) {
+ vec_off = (ntable[nsize * nidx] << 9) + 2 * ntable[nsize * nidx + 1];
+ /* would metafy the key but pcre limits characters in the name */
+ *hashptr++ = ztrdup((char *) ntable + nsize * nidx + 2);
+ *hashptr++ = metafy(arg + ovec[vec_off],
+ ovec[vec_off+1]-ovec[vec_off], META_DUP);
+ }
+ sethparam(namedassoc, hash);
+ }
+
if (want_begin_end) {
/*
* cond-infix rather than builtin; also not bash; so we set a bunch
@@ -253,7 +261,8 @@ zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar,
setiparam("MEND", offs + !isset(KSHARRAYS) - 1);
if (nelem) {
char **mbegin, **mend, **bptr, **eptr;
- int i, *ipair;
+ int i;
+ size_t *ipair;
bptr = mbegin = zalloc(sizeof(char*)*(nelem+1));
eptr = mend = zalloc(sizeof(char*)*(nelem+1));
@@ -293,8 +302,6 @@ zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar,
setaparam("mend", mend);
}
}
-
- pcre_free_substring_list((const char **)captures);
}
return 0;
@@ -320,29 +327,33 @@ getposint(char *instr, char *nam)
static int
bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
{
- int ret, capcount, *ovec, ovecsize, c;
+ int ret, c;
+ pcre2_match_data *pcre_mdata = NULL;
char *matched_portion = NULL;
char *plaintext = NULL;
- char *receptacle = NULL;
+ char *receptacle;
+ char *named = NULL;
int return_value = 1;
/* The subject length and offset start are both int values in pcre_exec */
int subject_len;
int offset_start = 0;
int want_offset_pair = 0;
+ int use_dfa = 0;
if (pcre_pattern == NULL) {
zwarnnam(nam, "no pattern has been compiled");
return 1;
}
- matched_portion = "MATCH";
- receptacle = "match";
- if(OPT_HASARG(ops,c='a')) {
- receptacle = OPT_ARG(ops,c);
- }
- if(OPT_HASARG(ops,c='v')) {
- matched_portion = OPT_ARG(ops,c);
+ if (!(use_dfa = OPT_ISSET(ops, 'd'))) {
+ matched_portion = OPT_HASARG(ops, c='v') ? OPT_ARG(ops, c) : "MATCH";
+ named = OPT_HASARG(ops, c='A') ? OPT_ARG(ops, c) : ".pcre.match";
+ } else if (OPT_HASARG(ops, c='v') || OPT_HASARG(ops, c='A')) {
+ zwarnnam(nam, "-d cannot be combined with -%c", c);
+ return 1;
}
+ receptacle = OPT_HASARG(ops, 'a') ? OPT_ARG(ops, 'a') : "match";
+
if(OPT_HASARG(ops,c='n')) { /* The offset position to start the search, in bytes. */
if ((offset_start = getposint(OPT_ARG(ops,c), nam)) < 0)
return 1;
@@ -350,36 +361,57 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
/* For the entire match, 'Return' the offset byte positions instead of the matched string */
if(OPT_ISSET(ops,'b')) want_offset_pair = 1;
- if ((ret = pcre_fullinfo(pcre_pattern, pcre_hints, PCRE_INFO_CAPTURECOUNT, &capcount)))
- {
- zwarnnam(nam, "error %d in fullinfo", ret);
- return 1;
- }
-
- ovecsize = (capcount+1)*3;
- ovec = zalloc(ovecsize*sizeof(int));
-
plaintext = ztrdup(*args);
unmetafy(plaintext, &subject_len);
+ pcre2_match_context_8 *mcontext = pcre2_match_context_create(NULL);
+ pcre2_set_callout(mcontext, &pcre_callout, 0);
+
if (offset_start > 0 && offset_start >= subject_len)
- ret = PCRE_ERROR_NOMATCH;
- else
- ret = pcre_exec(pcre_pattern, pcre_hints, plaintext, subject_len, offset_start, 0, ovec, ovecsize);
+ ret = PCRE2_ERROR_NOMATCH;
+ else if (use_dfa) {
+ PCRE2_SIZE old, wscount = 128, capcount = 128;
+ void *workspace = zhalloc(sizeof(int) * wscount);
+ pcre_mdata = pcre2_match_data_create(capcount, NULL);
+ do {
+ ret = pcre2_dfa_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len,
+ offset_start, 0, pcre_mdata, mcontext, (int *) workspace, wscount);
+ if (ret == PCRE2_ERROR_DFA_WSSIZE) {
+ old = wscount;
+ wscount += wscount / 2;
+ workspace = hrealloc(workspace, sizeof(int) * old, sizeof(int) * wscount);
+ } else if (ret == 0) {
+ capcount += capcount / 2;
+ pcre2_match_data_free(pcre_mdata);
+ pcre_mdata = pcre2_match_data_create(capcount, NULL);
+ } else
+ break;
+ } while(1);
+ } else {
+ pcre_mdata = pcre2_match_data_create_from_pattern(pcre_pattern, NULL);
+ ret = pcre2_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len,
+ offset_start, 0, pcre_mdata, mcontext);
+ if (ret > 0)
+ ret = pcre2_get_ovector_count(pcre_mdata);
+ }
if (ret==0) return_value = 0;
- else if (ret==PCRE_ERROR_NOMATCH) /* no match */;
+ else if (ret == PCRE2_ERROR_NOMATCH) /* no match */;
else if (ret>0) {
- zpcre_get_substrings(plaintext, ovec, ret, matched_portion, receptacle,
- want_offset_pair, 0, 0);
+ zpcre_get_substrings(pcre_pattern, plaintext, pcre_mdata, ret,
+ matched_portion, receptacle, named, want_offset_pair, use_dfa, 0);
return_value = 0;
}
else {
- zwarnnam(nam, "error in pcre_exec [%d]", ret);
+ PCRE2_UCHAR buffer[256];
+ pcre2_get_error_message(ret, buffer, sizeof(buffer));
+ zwarnnam(nam, "error in pcre matching for %s: %s", *args, buffer);
}
- if (ovec)
- zfree(ovec, ovecsize*sizeof(int));
+ if (pcre_mdata)
+ pcre2_match_data_free(pcre_mdata);
+ if (mcontext)
+ pcre2_match_context_free(mcontext);
zsfree(plaintext);
return return_value;
@@ -389,17 +421,19 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
static int
cond_pcre_match(char **a, int id)
{
- pcre *pcre_pat;
- const char *pcre_err;
+ pcre2_code *pcre_pat = NULL;
+ int pcre_err;
+ PCRE2_SIZE pcre_erroff;
char *lhstr, *rhre, *lhstr_plain, *rhre_plain, *avar, *svar;
- int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize;
+ int r = 0, pcre_opts = 0;
+ pcre2_match_data *pcre_mdata = NULL;
int lhstr_plain_len, rhre_plain_len;
int return_value = 0;
if (zpcre_utf8_enabled())
- pcre_opts |= PCRE_UTF8;
+ pcre_opts |= PCRE2_UTF;
if (isset(REMATCHPCRE) && !isset(CASEMATCH))
- pcre_opts |= PCRE_CASELESS;
+ pcre_opts |= PCRE2_CASELESS;
lhstr = cond_str(a,0,0);
rhre = cond_str(a,1,0);
@@ -407,9 +441,6 @@ cond_pcre_match(char **a, int id)
rhre_plain = ztrdup(rhre);
unmetafy(lhstr_plain, &lhstr_plain_len);
unmetafy(rhre_plain, &rhre_plain_len);
- pcre_pat = NULL;
- ov = NULL;
- ovsize = 0;
if (isset(BASHREMATCH)) {
svar = NULL;
@@ -421,27 +452,27 @@ cond_pcre_match(char **a, int id)
switch(id) {
case CPCRE_PLAIN:
- if ((int)strlen(rhre_plain) != rhre_plain_len) {
- zwarn("embedded NULs in PCRE pattern terminate pattern");
- }
- pcre_pat = pcre_compile(rhre_plain, pcre_opts, &pcre_err, &pcre_errptr, NULL);
- if (pcre_pat == NULL) {
- zwarn("failed to compile regexp /%s/: %s", rhre, pcre_err);
+ if (!(pcre_pat = pcre2_compile((PCRE2_SPTR) rhre_plain,
+ (PCRE2_SIZE) rhre_plain_len, pcre_opts,
+ &pcre_err, &pcre_erroff, NULL)))
+ {
+ PCRE2_UCHAR buffer[256];
+ pcre2_get_error_message(pcre_err, buffer, sizeof(buffer));
+ zwarn("failed to compile regexp /%s/: %s", rhre, buffer);
break;
}
- pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt);
- ovsize = (capcnt+1)*3;
- ov = zalloc(ovsize*sizeof(int));
- r = pcre_exec(pcre_pat, NULL, lhstr_plain, lhstr_plain_len, 0, 0, ov, ovsize);
- /* r < 0 => error; r==0 match but not enough size in ov
+ pcre_mdata = pcre2_match_data_create_from_pattern(pcre_pat, NULL);
+ r = pcre2_match(pcre_pat, (PCRE2_SPTR8) lhstr_plain, lhstr_plain_len,
+ 0, 0, pcre_mdata, NULL);
+ /* r < 0 => error; r==0 match but not enough size in match data
* r > 0 => (r-1) substrings found; r==1 => no substrings
*/
if (r==0) {
- zwarn("reportable zsh problem: pcre_exec() returned 0");
+ zwarn("reportable zsh problem: pcre2_match() returned 0");
return_value = 1;
break;
}
- else if (r==PCRE_ERROR_NOMATCH) {
+ else if (r == PCRE2_ERROR_NOMATCH) {
return_value = 0; /* no match */
break;
}
@@ -450,9 +481,9 @@ cond_pcre_match(char **a, int id)
break;
}
else if (r>0) {
- zpcre_get_substrings(lhstr_plain, ov, r, svar, avar, 0,
- isset(BASHREMATCH),
- !isset(BASHREMATCH));
+ uint32_t ovec_count = pcre2_get_ovector_count(pcre_mdata);
+ zpcre_get_substrings(pcre_pat, lhstr_plain, pcre_mdata, ovec_count, svar, avar,
+ ".pcre.match", 0, isset(BASHREMATCH), !isset(BASHREMATCH));
return_value = 1;
break;
}
@@ -463,10 +494,10 @@ cond_pcre_match(char **a, int id)
free(lhstr_plain);
if(rhre_plain)
free(rhre_plain);
+ if (pcre_mdata)
+ pcre2_match_data_free(pcre_mdata);
if (pcre_pat)
- pcre_free(pcre_pat);
- if (ov)
- zfree(ov, ovsize*sizeof(int));
+ pcre2_code_free(pcre_pat);
return return_value;
}
@@ -488,18 +519,18 @@ static struct conddef cotab[] = {
static struct builtin bintab[] = {
BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimxs", NULL),
- BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "a:v:n:b", NULL),
+ BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "A:a:v:n:bd", NULL),
BUILTIN("pcre_study", 0, bin_pcre_study, 0, 0, 0, NULL, NULL)
};
static struct features module_features = {
bintab, sizeof(bintab)/sizeof(*bintab),
-#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC)
+#if defined(HAVE_PCRE2_COMPILE_8) && defined(HAVE_PCRE2_H)
cotab, sizeof(cotab)/sizeof(*cotab),
-#else /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */
+#else /* !(HAVE_PCRE2_COMPILE_8 && HAVE_PCRE2_H) */
NULL, 0,
-#endif /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */
+#endif /* !(HAVE_PCRE2_COMPILE_8 && HAVE_PCRE2_H) */
NULL, 0,
NULL, 0,
0
@@ -546,19 +577,9 @@ cleanup_(Module m)
int
finish_(UNUSED(Module m))
{
-#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC)
-#ifdef HAVE_PCRE_STUDY
- if (pcre_hints)
-#ifdef PCRE_CONFIG_JIT
- pcre_free_study(pcre_hints);
-#else
- pcre_free(pcre_hints);
-#endif
- pcre_hints = NULL;
-#endif
-
+#if defined(HAVE_PCRE2_COMPILE_8) && defined(HAVE_PCRE2_H)
if (pcre_pattern)
- pcre_free(pcre_pattern);
+ pcre2_code_free(pcre_pattern);
pcre_pattern = NULL;
#endif