summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog3
-rw-r--r--Src/Modules/pcre.c33
-rw-r--r--Test/V07pcre.ztst11
3 files changed, 37 insertions, 10 deletions
diff --git a/ChangeLog b/ChangeLog
index dc091bf0a..d666f21ce 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
2016-01-08 Barton E. Schaefer <schaefer@zsh.org>
+ * Jun T.: 37515: Src/Modules/pcre.c, Test/V07pcre.ztst: multibyte
+ handling as per 35448.
+
* unposted (cf. Jun T.: 37516): Src/builtin.c: refine READ_MSTREAM
to avoid unsequenced evaluation
diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c
index 2393cd1e7..aa5c8ed5b 100644
--- a/Src/Modules/pcre.c
+++ b/Src/Modules/pcre.c
@@ -190,18 +190,25 @@ zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar,
if (want_begin_end) {
char *ptr = arg;
zlong offs = 0;
+ int clen, leftlen;
/* Count the characters before the match */
- MB_METACHARINIT();
- while (ptr < arg + ovec[0]) {
+ MB_CHARINIT();
+ leftlen = ovec[0];
+ while (leftlen) {
offs++;
- ptr += MB_METACHARLEN(ptr);
+ clen = MB_CHARLEN(ptr, leftlen);
+ ptr += clen;
+ leftlen -= clen;
}
setiparam("MBEGIN", offs + !isset(KSHARRAYS));
/* Add on the characters in the match */
- while (ptr < arg + ovec[1]) {
+ leftlen = ovec[1] - ovec[0];
+ while (leftlen) {
offs++;
- ptr += MB_METACHARLEN(ptr);
+ clen = MB_CHARLEN(ptr, leftlen);
+ ptr += clen;
+ leftlen -= clen;
}
setiparam("MEND", offs + !isset(KSHARRAYS) - 1);
if (nelem) {
@@ -219,17 +226,23 @@ zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar,
ptr = arg;
offs = 0;
/* Find the start offset */
- MB_METACHARINIT();
- while (ptr < arg + ipair[0]) {
+ MB_CHARINIT();
+ leftlen = ipair[0];
+ while (leftlen) {
offs++;
- ptr += MB_METACHARLEN(ptr);
+ clen = MB_CHARLEN(ptr, leftlen);
+ ptr += clen;
+ leftlen -= clen;
}
convbase(buf, offs + !isset(KSHARRAYS), 10);
*bptr = ztrdup(buf);
/* Continue to the end offset */
- while (ptr < arg + ipair[1]) {
+ leftlen = ipair[1] - ipair[0];
+ while (leftlen) {
offs++;
- ptr += MB_METACHARLEN(ptr);
+ clen = MB_CHARLEN(ptr, leftlen);
+ ptr += clen;
+ leftlen -= clen;
}
convbase(buf, offs + !isset(KSHARRAYS) - 1, 10);
*eptr = ztrdup(buf);
diff --git a/Test/V07pcre.ztst b/Test/V07pcre.ztst
index ddfd3f5cd..39077564c 100644
--- a/Test/V07pcre.ztst
+++ b/Test/V07pcre.ztst
@@ -37,6 +37,17 @@
>o→b
>→
+ unset match mend
+ s=$'\u00a0'
+ [[ $s =~ '^.$' ]] && print OK
+ [[ A${s}B =~ .(.). && $match[1] == $s ]] && print OK
+ [[ A${s}${s}B =~ A([^[:ascii:]]*)B && $mend[1] == 3 ]] && print OK
+ unset s
+0:Raw IMETA characters in input string
+>OK
+>OK
+>OK
+
[[ foo =~ f.+ ]] ; print $?
[[ foo =~ x.+ ]] ; print $?
[[ ! foo =~ f.+ ]] ; print $?