summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--README34
-rw-r--r--Src/glob.c19
-rw-r--r--Src/lex.c31
-rw-r--r--Src/pattern.c13
-rw-r--r--Src/zsh.h16
-rw-r--r--Test/D02glob.ztst33
7 files changed, 110 insertions, 42 deletions
diff --git a/ChangeLog b/ChangeLog
index 71acc1e64..4264932f6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2016-01-19 Peter Stephenson <p.stephenson@samsung.com>
+
+ * 37689: README, Src/glob.c, Src/lex.c, Src/pattern.c,
+ Src/zsh.h, Test/D02glob.ztst: also ! and ^ need to be tokenised
+ in character set.
+
2016-01-18 Daniel Shahaf <d.s@daniel.shahaf.name>
* 37678: Src/glob.c, Src/lex.c, Src/pattern.c, Src/utils.c,
diff --git a/README b/README
index 2e2ebce2b..6e5b73067 100644
--- a/README
+++ b/README
@@ -29,17 +29,43 @@ Zsh is a shell with lots of features. For a list of some of these, see the
file FEATURES, and for the latest changes see NEWS. For more
details, see the documentation.
-Incompatibilities between 5.1 and 5.2
+Incompatibilities between 5.2 and 5.3
-------------------------------------
+In character classes delimited by "[" and "]" within patterns, whether
+used for filename generation (globbing) or other forms of pattern
+matching, it used not to be possible to quote "-" when used for a range,
+or "^" and "!" when used for negating a character set. The characters can
+now be quoted by any of the standard shell means, but note that
+the "[" and "]" must not be quoted. For example,
+
+ [[ $a = ['a-z'] ]]
+
+matches if the variable a contains just one of the characters "a", "-"
+or "z" only. Previously this would have matched any lower case ASCII
+letter. Note therefore the useful fact that
+
+ [[ $a = ["$cset"] ]]
+
+matches any character contained in the variable "cset". A consequence
+of this change is that variables that should have active ranges need
+(with default zsh options) to be indicated explicitly, e.g.
+
+ cset="a-z"
+ [[ b = [${~cset}] ]]
+
+The "~" causes the "-" character to be active. In sh emulation the
+"~" is unncessary in this example and double quotes must be used to
+suppress the range behaviour of the "-".
+
+Incompatibilities between 5.0.8 and 5.2
+---------------------------------------
+
The behaviour of the parameter flag (P) has changed when it appears
in a nested parameter group, in order to make it more useful in
such cases. A (P) in the outermost parameter group behaves as
before. See NEWS for more.
-Incompatibilities between 5.0.8 and 5.1
----------------------------------------
-
The default behaviour when text is pasted into an X Windows terminal has
changed significantly (unless you are using a very old terminal emulator
that doesn't support this mode). Now, the new "bracketed paste mode"
diff --git a/Src/glob.c b/Src/glob.c
index e5d8956e6..c7992813e 100644
--- a/Src/glob.c
+++ b/Src/glob.c
@@ -3476,7 +3476,7 @@ static void
zshtokenize(char *s, int flags)
{
char *t;
- int bslash = 0, seen_brct = 0;
+ int bslash = 0;
for (; *s; s++) {
cont:
@@ -3507,20 +3507,6 @@ zshtokenize(char *s, int flags)
*t = Inang;
*s = Outang;
break;
- case '[':
- if (bslash)
- s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
- else {
- seen_brct = 1;
- *s = Inbrack;
- }
- break;
- case '-':
- if (bslash)
- s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
- else if (seen_brct) /* see corresonding code in lex.c */
- *s = Dash;
- break;
case '(':
case '|':
case ')':
@@ -3531,10 +3517,13 @@ zshtokenize(char *s, int flags)
case '^':
case '#':
case '~':
+ case '[':
case ']':
case '*':
case '?':
case '=':
+ case '-':
+ case '!':
for (t = ztokens; *t; t++) {
if (*t == *s) {
if (bslash)
diff --git a/Src/lex.c b/Src/lex.c
index 9a7e3b8fe..0202d2559 100644
--- a/Src/lex.c
+++ b/Src/lex.c
@@ -35,7 +35,7 @@
/* tokens */
/**/
-mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,-'\"\\\\";
+mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,-!'\"\\\\";
/* parts of the current token */
@@ -395,8 +395,9 @@ ctxtlex(void)
#define LX2_BQUOTE 16
#define LX2_COMMA 17
#define LX2_DASH 18
-#define LX2_OTHER 19
-#define LX2_META 20
+#define LX2_BANG 19
+#define LX2_OTHER 20
+#define LX2_META 21
static unsigned char lexact1[256], lexact2[256], lextok2[256];
@@ -406,10 +407,10 @@ initlextabs(void)
{
int t0;
static char *lx1 = "\\q\n;!&|(){}[]<>";
- static char *lx2 = ";)|$[]~({}><=\\\'\"`,-";
+ static char *lx2 = ";)|$[]~({}><=\\\'\"`,-!";
for (t0 = 0; t0 != 256; t0++) {
- lexact1[t0] = LX1_OTHER;
+ lexact1[t0] = LX1_OTHER;
lexact2[t0] = LX2_OTHER;
lextok2[t0] = t0;
}
@@ -1361,12 +1362,20 @@ gettokstr(int c, int sub)
*/
if (seen_brct)
c = Dash;
- else
- c = '-';
- break;
- }
- add(c);
- c = hgetc();
+ else
+ c = '-';
+ break;
+ case LX2_BANG:
+ /*
+ * Same logic as Dash, for ! to perform negation in range.
+ */
+ if (seen_brct)
+ c = Bang;
+ else
+ c = '!';
+ }
+ add(c);
+ c = hgetc();
if (intpos)
intpos--;
if (lexstop)
diff --git a/Src/pattern.c b/Src/pattern.c
index d2b8c590b..72c7d97d5 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -247,7 +247,7 @@ typedef unsigned long zrange_t;
*/
static const char zpc_chars[ZPC_COUNT] = {
'/', '\0', Bar, Outpar, Tilde, Inpar, Quest, Star, Inbrack, Inang,
- Hat, Pound, Bnullkeep, Quest, Star, '+', '!', '@'
+ Hat, Pound, Bnullkeep, Quest, Star, '+', Bang, '!', '@'
};
/*
@@ -257,7 +257,7 @@ static const char zpc_chars[ZPC_COUNT] = {
/**/
mod_export const char *zpc_strings[ZPC_COUNT] = {
NULL, NULL, "|", NULL, "~", "(", "?", "*", "[", "<",
- "^", "#", NULL, "?(", "*(", "+(", "!(", "@("
+ "^", "#", NULL, "?(", "*(", "+(", "!(", "\\!(", "@("
};
/*
@@ -481,7 +481,7 @@ patcompcharsset(void)
*/
zpc_special[ZPC_KSH_QUEST] = zpc_special[ZPC_KSH_STAR] =
zpc_special[ZPC_KSH_PLUS] = zpc_special[ZPC_KSH_BANG] =
- zpc_special[ZPC_KSH_AT] = Marker;
+ zpc_special[ZPC_KSH_BANG2] = zpc_special[ZPC_KSH_AT] = Marker;
}
/*
* Note that if we are using KSHGLOB, then we test for a following
@@ -1268,6 +1268,8 @@ patcomppiece(int *flagp, int paren)
kshchar = STOUC('+');
else if (*patparse == zpc_special[ZPC_KSH_BANG])
kshchar = STOUC('!');
+ else if (*patparse == zpc_special[ZPC_KSH_BANG2])
+ kshchar = STOUC('!');
else if (*patparse == zpc_special[ZPC_KSH_AT])
kshchar = STOUC('@');
else if (*patparse == zpc_special[ZPC_KSH_STAR])
@@ -1424,7 +1426,7 @@ patcomppiece(int *flagp, int paren)
DPUTS(zpc_special[ZPC_INBRACK] == Marker,
"Treating '[' as pattern character although disabled");
flags |= P_SIMPLE;
- if (*patparse == Hat || *patparse == '^' || *patparse == '!') {
+ if (*patparse == Hat || *patparse == Bang) {
patparse++;
starter = patnode(P_ANYBUT);
} else
@@ -4245,7 +4247,8 @@ haswilds(char *str)
((str[-1] == Quest && !zpc_disables[ZPC_KSH_QUEST]) ||
(str[-1] == Star && !zpc_disables[ZPC_KSH_STAR]) ||
(str[-1] == '+' && !zpc_disables[ZPC_KSH_PLUS]) ||
- (str[-1] == '!' && !zpc_disables[ZPC_KSH_BANG]) ||
+ (str[-1] == Bang && !zpc_disables[ZPC_KSH_BANG]) ||
+ (str[-1] == '!' && !zpc_disables[ZPC_KSH_BANG2]) ||
(str[-1] == '@' && !zpc_disables[ZPC_KSH_AT]))))
return 1;
break;
diff --git a/Src/zsh.h b/Src/zsh.h
index 6ee2a9c8d..b83b8bdbb 100644
--- a/Src/zsh.h
+++ b/Src/zsh.h
@@ -193,29 +193,30 @@ struct mathfunc {
#define Qtick ((char) 0x99)
#define Comma ((char) 0x9a)
#define Dash ((char) 0x9b) /* Only in patterns */
+#define Bang ((char) 0x9c) /* Only in patterns */
/*
* Marks the last of the group above.
* Remaining tokens are even more special.
*/
-#define LAST_NORMAL_TOK Dash
+#define LAST_NORMAL_TOK Bang
/*
* Null arguments: placeholders for single and double quotes
* and backslashes.
*/
-#define Snull ((char) 0x9c)
-#define Dnull ((char) 0x9d)
-#define Bnull ((char) 0x9e)
+#define Snull ((char) 0x9d)
+#define Dnull ((char) 0x9e)
+#define Bnull ((char) 0x9f)
/*
* Backslash which will be returned to "\" instead of being stripped
* when we turn the string into a printable format.
*/
-#define Bnullkeep ((char) 0x9f)
+#define Bnullkeep ((char) 0xa0)
/*
* Null argument that does not correspond to any character.
* This should be last as it does not appear in ztokens and
* is used to initialise the IMETA type in inittyptab().
*/
-#define Nularg ((char) 0xa0)
+#define Nularg ((char) 0xa1)
/*
* Take care to update the use of IMETA appropriately when adding
@@ -226,7 +227,7 @@ struct mathfunc {
* Also used in pattern character arrays as guaranteed not to
* mark a character in a string.
*/
-#define Marker ((char) 0xa1)
+#define Marker ((char) 0xa2)
/* chars that need to be quoted if meant literally */
@@ -1549,6 +1550,7 @@ enum zpc_chars {
ZPC_KSH_STAR, /* * for *(...) in KSH_GLOB */
ZPC_KSH_PLUS, /* + for +(...) in KSH_GLOB */
ZPC_KSH_BANG, /* ! for !(...) in KSH_GLOB */
+ ZPC_KSH_BANG2, /* ! for !(...) in KSH_GLOB, untokenised */
ZPC_KSH_AT, /* @ for @(...) in KSH_GLOB */
ZPC_COUNT /* Number of special chararacters */
};
diff --git a/Test/D02glob.ztst b/Test/D02glob.ztst
index 89256e303..a6b704a8e 100644
--- a/Test/D02glob.ztst
+++ b/Test/D02glob.ztst
@@ -622,3 +622,36 @@
0:quoted - works in pattern in parameter
>bcdef
>cdef
+
+ [[ a != [^a] ]]
+0:^ active in character class if not quoted
+
+ [[ a = ['^a'] ]]
+0:^ not active in character class if quoted
+
+ [[ a != [!a] ]]
+0:! active in character class if not quoted
+
+ [[ a = ['!a'] ]]
+0:! not active in character class if quoted
+
+ # Actually, we don't need the quoting here,
+ # c.f. the next test. This just makes it look
+ # more standard.
+ cset="^a-z"
+ [[ "^" = ["$cset"] ]] || print Fail 1
+ [[ "a" = ["$cset"] ]] || print Fail 2
+ [[ "-" = ["$cset"] ]] || print Fail 3
+ [[ "z" = ["$cset"] ]] || print Fail 4
+ [[ "1" != ["$cset"] ]] || print Fail 5
+ [[ "b" != ["$cset"] ]] || print Fail 6
+0:character set specified as quoted variable
+
+ cset="^a-z"
+ [[ "^" = [$~cset] ]] || print Fail 1
+ [[ "a" != [$~cset] ]] || print Fail 2
+ [[ "-" = [$~cset] ]] || print Fail 3
+ [[ "z" != [$~cset] ]] || print Fail 4
+ [[ "1" = [$~cset] ]] || print Fail 5
+ [[ "b" != [$~cset] ]] || print Fail 6
+0:character set specified as active variabe