summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Stephenson <pws@users.sourceforge.net>2010-01-17 21:48:25 +0000
committerPeter Stephenson <pws@users.sourceforge.net>2010-01-17 21:48:25 +0000
commit8890e6e19ffa0bede585527671987972137009c7 (patch)
tree624d1830c56f33b2f3f1a3560306bad6336a4d32
parenta2c89eb701e5ec3ffca33f25673a9600a9735930 (diff)
downloadzsh-8890e6e19ffa0bede585527671987972137009c7.tar.gz
zsh-8890e6e19ffa0bede585527671987972137009c7.zip
27600: extend =~ syntax to set positional variables for matches
-rw-r--r--ChangeLog8
-rw-r--r--Doc/Zsh/cond.yo24
-rw-r--r--Src/Modules/pcre.c79
-rw-r--r--Src/Modules/regex.c56
-rw-r--r--Test/C02cond.ztst33
5 files changed, 188 insertions, 12 deletions
diff --git a/ChangeLog b/ChangeLog
index 35600b5e1..530decde1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2010-01-17 Peter Stephenson <p.w.stephenson@ntlworld.com>
+
+ * 27600: Doc/Zsh/cond.yo, Src/Modules/pcre.c, Src/Modules/regex.c,
+ Test/C02cond.ztst: extend =~ syntax to set MBEGIN and MEND
+ with MATCH and mbegin and mend with match.
+
2010-01-16 Peter Stephenson <p.w.stephenson@ntlworld.com>
* Frank: 27599: Doc/Zsh/contrib.yo: fix formatting documentation
@@ -12585,5 +12591,5 @@
*****************************************************
* This is used by the shell to define $ZSH_PATCHLEVEL
-* $Revision: 1.4862 $
+* $Revision: 1.4863 $
*****************************************************
diff --git a/Doc/Zsh/cond.yo b/Doc/Zsh/cond.yo
index 9d8f145f8..4b7304407 100644
--- a/Doc/Zsh/cond.yo
+++ b/Doc/Zsh/cond.yo
@@ -117,13 +117,29 @@ the tt(zsh/pcre) module, else it is tested as a POSIX
extended regular expression using the tt(zsh/regex) module.
Upon successful match, some variables will be updated; no variables
are changed if the matching fails.
+
+If the option tt(BASH_REMATCH) is not set the scalar parameter
+tt(MATCH) is set to the substring that matched the pattern and
+the integer parameters tt(MBEGIN) and tt(MEND) to the index of the start
+and end, respectively, of the match in var(string), such that if
+var(string) is contained in variable tt(var) the expression
+`${var[$MBEGIN,$MEND]}' is identical to `$MATCH'. The setting
+of the option tt(KSH_ARRAYS) is respected. Likewise, the array
+tt(match) is set to the substrings that matched parenthesised
+subexpressions and the arrays tt(mbegin) and tt(mend) to the indices of
+the start and end positions, respectively, of the substrings within
+var(string). The arrays are not set if there were no parenthesised
+subexpresssions. For example, if the string `tt(a short string)' is matched
+against the regular expression `tt(s(...)t)', then (assuming the option
+tt(KSH_ARRAYS) is not set) tt(MATCH), tt(MBEGIN)
+and tt(MEND) are `tt(short)', 3 and 7, respectively, while tt(match),
+tt(mbegin) and tt(mend) are single entry arrays containing
+the strings `tt(hor)', `tt(4)' and `tt(6), respectively.
+
If the option tt(BASH_REMATCH) is set the array
tt(BASH_REMATCH) is set to the substring that matched the pattern
followed by the substrings that matched parenthesised
-subexpressions within the pattern; otherwise, the scalar parameter
-tt(MATCH) is set to the substring that matched the pattern and
-and the array tt(match) to the substrings that matched parenthesised
-subexpressions.
+subexpressions within the pattern.
)
item(var(string1) tt(<) var(string2))(
true if var(string1) comes before var(string2)
diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c
index 08205d144..f8b79adea 100644
--- a/Src/Modules/pcre.c
+++ b/Src/Modules/pcre.c
@@ -138,8 +138,9 @@ bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int f
/**/
static int
-zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar, char *substravar,
- int want_offset_pair, int matchedinarr)
+zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar,
+ char *substravar, int want_offset_pair, int matchedinarr,
+ int want_begin_end)
{
char **captures, *match_all, **matches;
char offset_all[50];
@@ -154,6 +155,7 @@ zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar, char *substr
/* captures[0] will be entire matched string, [1] first substring */
if (!pcre_get_substring_list(arg, ovec, ret, (const char ***)&captures)) {
+ int nelem = arrlen(captures)-1;
/* Set to the offsets of the complete match */
if (want_offset_pair) {
sprintf(offset_all, "%d %d", ovec[0], ovec[1]);
@@ -161,8 +163,70 @@ zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar, char *substr
}
match_all = ztrdup(captures[0]);
setsparam(matchvar, match_all);
- matches = zarrdup(&captures[capture_start]);
- setaparam(substravar, matches);
+ /*
+ * If we're setting match, mbegin, mend we only do
+ * so if there were parenthesised matches, for consistency
+ * (c.f. regex.c).
+ */
+ if (!want_begin_end || nelem) {
+ matches = zarrdup(&captures[capture_start]);
+ setaparam(substravar, matches);
+ }
+
+ if (want_begin_end) {
+ char *ptr = arg;
+ zlong offs = 0;
+
+ /* Count the characters before the match */
+ MB_METACHARINIT();
+ while (ptr < arg + ovec[0]) {
+ offs++;
+ ptr += MB_METACHARLEN(ptr);
+ }
+ setiparam("MBEGIN", offs + !isset(KSHARRAYS));
+ /* Add on the characters in the match */
+ while (ptr < arg + ovec[1]) {
+ offs++;
+ ptr += MB_METACHARLEN(ptr);
+ }
+ setiparam("MEND", offs + !isset(KSHARRAYS) - 1);
+ if (nelem) {
+ char **mbegin, **mend, **bptr, **eptr;
+ int i, *ipair;
+
+ bptr = mbegin = zalloc(nelem+1);
+ eptr = mend = zalloc(nelem+1);
+
+ for (ipair = ovec + 2, i = 0;
+ i < nelem;
+ ipair += 2, i++, bptr++, eptr++)
+ {
+ char buf[DIGBUFSIZE];
+ ptr = arg;
+ offs = 0;
+ /* Find the start offset */
+ MB_METACHARINIT();
+ while (ptr < arg + ipair[0]) {
+ offs++;
+ ptr += MB_METACHARLEN(ptr);
+ }
+ convbase(buf, offs + !isset(KSHARRAYS), 10);
+ *bptr = ztrdup(buf);
+ /* Continue to the end offset */
+ while (ptr < arg + ipair[1]) {
+ offs++;
+ ptr += MB_METACHARLEN(ptr);
+ }
+ convbase(buf, offs + !isset(KSHARRAYS) - 1, 10);
+ *eptr = ztrdup(buf);
+ }
+ *bptr = *eptr = NULL;
+
+ setaparam("mbegin", mbegin);
+ setaparam("mend", mend);
+ }
+ }
+
pcre_free_substring_list((const char **)captures);
}
@@ -238,7 +302,8 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
if (ret==0) return_value = 0;
else if (ret==PCRE_ERROR_NOMATCH) /* no match */;
else if (ret>0) {
- zpcre_get_substrings(*args, ovec, ret, matched_portion, receptacle, want_offset_pair, 0);
+ zpcre_get_substrings(*args, ovec, ret, matched_portion, receptacle,
+ want_offset_pair, 0, 0);
return_value = 0;
}
else {
@@ -297,7 +362,9 @@ cond_pcre_match(char **a, int id)
break;
}
else if (r>0) {
- zpcre_get_substrings(lhstr, ov, r, NULL, avar, 0, isset(BASHREMATCH));
+ zpcre_get_substrings(lhstr, ov, r, NULL, avar, 0,
+ isset(BASHREMATCH),
+ !isset(BASHREMATCH));
return_value = 1;
break;
}
diff --git a/Src/Modules/regex.c b/Src/Modules/regex.c
index 8a9f3e608..25dbddf07 100644
--- a/Src/Modules/regex.c
+++ b/Src/Modules/regex.c
@@ -108,11 +108,65 @@ zcond_regex_match(char **a, int id)
if (isset(BASHREMATCH)) {
setaparam("BASH_REMATCH", arr);
} else {
+ zlong offs;
+ char *ptr;
+
m = matches;
s = ztrduppfx(lhstr + m->rm_so, m->rm_eo - m->rm_so);
setsparam("MATCH", s);
- if (nelem)
+ /*
+ * Count the characters before the match.
+ */
+ ptr = lhstr;
+ offs = 0;
+ MB_METACHARINIT();
+ while (ptr < lhstr + m->rm_so) {
+ offs++;
+ ptr += MB_METACHARLEN(ptr);
+ }
+ setiparam("MBEGIN", offs + !isset(KSHARRAYS));
+ /*
+ * Add on the characters in the match.
+ */
+ while (ptr < lhstr + m->rm_eo) {
+ offs++;
+ ptr += MB_METACHARLEN(ptr);
+ }
+ setiparam("MEND", offs + !isset(KSHARRAYS) - 1);
+ if (nelem) {
+ char **mbegin, **mend, **bptr, **eptr;
+ bptr = mbegin = (char **)zalloc(nelem+1);
+ eptr = mend = (char **)zalloc(nelem+1);
+
+ for (m = matches + start, n = start;
+ n <= (int)re.re_nsub;
+ ++n, ++m, ++bptr, ++eptr)
+ {
+ char buf[DIGBUFSIZE];
+ ptr = lhstr;
+ offs = 0;
+ /* Find the start offset */
+ MB_METACHARINIT();
+ while (ptr < lhstr + m->rm_so) {
+ offs++;
+ ptr += MB_METACHARLEN(ptr);
+ }
+ convbase(buf, offs + !isset(KSHARRAYS), 10);
+ *bptr = ztrdup(buf);
+ /* Continue to the end offset */
+ while (ptr < lhstr + m->rm_eo) {
+ offs++;
+ ptr += MB_METACHARLEN(ptr);
+ }
+ convbase(buf, offs + !isset(KSHARRAYS) - 1, 10);
+ *eptr = ztrdup(buf);
+ }
+ *bptr = *eptr = NULL;
+
setaparam("match", arr);
+ setaparam("mbegin", mbegin);
+ setaparam("mend", mend);
+ }
}
}
else
diff --git a/Test/C02cond.ztst b/Test/C02cond.ztst
index de82dcbe2..b0e278f4b 100644
--- a/Test/C02cond.ztst
+++ b/Test/C02cond.ztst
@@ -251,6 +251,39 @@ F:Failures in these cases do not indicate a problem in the shell.
fi
0:regex tests shouldn't crash
+ if zmodload -i zsh/regex 2>/dev/null; then
+ string="this has stuff in it"
+ bad_regex=0
+ if [[ $string =~ "h([a-z]*) s([a-z]*) " ]]; then
+ if [[ "$MATCH $MBEGIN $MEND" != "has stuff 6 15" ]]; then
+ print -r "regex variables MATCH MBEGIN MEND:
+ '$MATCH $MBEGIN $MEND'
+ should be:
+ 'has stuff 6 15'" >&2
+ bad_regex=1
+ else
+ results=("as 7 8" "tuff 11 14")
+ for i in 1 2; do
+ if [[ "$match[$i] $mbegin[$i] $mend[$i]" != $results[i] ]]; then
+ print -r "regex variables match[$i] mbegin[$i] mend[$i]:
+ '$match[$i] $mbegin[$i] $mend[$i]'
+ should be
+ '$results[$i]'" >&2
+ break
+ fi
+ done
+ fi
+ else
+ print -r "regex failed to match '$string'" >&2
+ fi
+ (( bad_regex )) || print OK
+ else
+ # if it didn't load, tough, but not a test error
+ print OK
+ fi
+0:MATCH, MBEGIN, MEND, match, mbegin, mend
+>OK
+
%clean
# This works around a bug in rm -f in some versions of Cygwin
chmod 644 unmodish