summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--Doc/Zsh/expn.yo87
-rw-r--r--Src/pattern.c32
-rw-r--r--Test/D02glob.ztst25
4 files changed, 135 insertions, 14 deletions
diff --git a/ChangeLog b/ChangeLog
index a254f0a32..4b61f4ee5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2005-05-09 Peter Stephenson <pws@csr.com>
+
+ * 21211: Doc/Zsh/expn.yo, Src/pattern.c, Test/D02glob.ztst:
+ Add [[:IDENT:]], [[:IFS:]], [[:IFSSPACE:]], [[:WORD:]] tests.
+
2005-05-08 Bart Schaefer <schaefer@zsh.org>
* 21235, 21236: Completion/Unix/Command/_ssh: fix remote filename
diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo
index a16e252e6..a6235222f 100644
--- a/Doc/Zsh/expn.yo
+++ b/Doc/Zsh/expn.yo
@@ -1224,19 +1224,82 @@ A `tt(-)' or `tt(])' may be matched by including it as the
first character in the list.
cindex(character classes)
There are also several named classes of characters, in the form
-`tt([:)var(name)tt(:])' with the following meanings: `tt([:alnum:])'
-alphanumeric, `tt([:alpha:])' alphabetic,
-`tt([:ascii:])' 7-bit,
-`tt([:blank:])' space or tab,
-`tt([:cntrl:])' control character, `tt([:digit:])' decimal
-digit, `tt([:graph:])' printable character except whitespace,
-`tt([:lower:])' lowercase letter, `tt([:print:])' printable character,
-`tt([:punct:])' printable character neither alphanumeric nor whitespace,
-`tt([:space:])' whitespace character, `tt([:upper:])' uppercase letter,
-`tt([:xdigit:])' hexadecimal digit. These use the macros provided by
+`tt([:)var(name)tt(:])' with the following meanings.
+The first set use the macros provided by
the operating system to test for the given character combinations,
-including any modifications due to local language settings: see
-manref(ctype)(3). Note that the square brackets are additional
+including any modifications due to local language settings, see
+manref(ctype)(3):
+
+startitem()
+item(tt([:alnum:]))(
+The character is alphanumeric
+)
+item(tt([:alpha:]))
+(
+The character is alphabetic
+)
+item(tt([:ascii:]))(
+The character is 7-bit, i.e. is a single-byte character without
+the top bit set.
+)
+item(tt([:blank:]))(
+The character is either space or tab
+)
+item(tt([:cntrl:]))(
+The character is a control character
+)
+item(tt([:digit:]))(
+The character is a decimal digit
+)
+item(tt([:graph:]))(
+The character is a printable character other than whitespace
+)
+item(tt([:lower:]))(l
+The character is a lowercase letter
+)
+item(tt([:print:]))(
+The character is printable
+)
+item(tt([:punct:]))(
+The character is printable but neither alphanumeric nor whitespace
+)
+item(tt([:space:]))(
+The character is whitespace
+)
+item(tt([:upper:]))(
+The character is an uppercase letter
+)
+item(tt([:xdigit:]))(
+The character is a hexadecimal digit
+)
+enditem()
+
+Another set of named classes is handled internally by the shell and
+is not sensitive to the locale:
+
+startitem()
+item(tt([:IDENT:]))(
+The character is allowed to form part of a shell identifier, such
+as a parameter name
+)
+item(tt([:IFS:]))(
+The character is used as an input field separator, i.e. is contained in the
+tt(IFS) parameter
+)
+item(tt([:IFSSPACE:]))(
+The character is an IFS white space character; see the documentation
+for tt(IFS) in
+ifzman(the zmanref(zshparams) manual page)\
+ifnzman(noderef(Parameters Used By The Shell))\
+.
+)
+item(tt([:WORD:]))(
+The character is treated as part of a word; this test is sensitive
+to the value of the tt(WORDCHARS) parameter
+)
+enditem()
+
+Note that the square brackets are additional
to those enclosing the whole set of characters, so to test for a
single alphanumeric character you need `tt([[:alnum:]])'. Named
character sets can be used alongside other types,
diff --git a/Src/pattern.c b/Src/pattern.c
index ed88bb7ce..393d9bf41 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -193,8 +193,12 @@ typedef union upat *Upat;
#define PP_SPACE 11
#define PP_UPPER 12
#define PP_XDIGIT 13
-#define PP_UNKWN 14
-#define PP_RANGE 15
+#define PP_IDENT 14
+#define PP_IFS 15
+#define PP_IFSSPACE 16
+#define PP_WORD 17
+#define PP_UNKWN 18
+#define PP_RANGE 19
#define P_OP(p) ((p)->l & 0xff)
#define P_NEXT(p) ((p)->l >> 8)
@@ -1118,6 +1122,14 @@ patcomppiece(int *flagp)
ch = PP_UPPER;
else if (!strncmp(patparse, "xdigit", len))
ch = PP_XDIGIT;
+ else if (!strncmp(patparse, "IDENT", len))
+ ch = PP_IDENT;
+ else if (!strncmp(patparse, "IFS", len))
+ ch = PP_IFS;
+ else if (!strncmp(patparse, "IFSSPACE", len))
+ ch = PP_IFSSPACE;
+ else if (!strncmp(patparse, "WORD", len))
+ ch = PP_WORD;
else
ch = PP_UNKWN;
patparse = nptr + 2;
@@ -2724,6 +2736,22 @@ patmatchrange(char *range, int ch)
if (isxdigit(ch))
return 1;
break;
+ case PP_IDENT:
+ if (iident(ch))
+ return 1;
+ break;
+ case PP_IFS:
+ if (isep(ch))
+ return 1;
+ break;
+ case PP_IFSSPACE:
+ if (iwsep(ch))
+ return 1;
+ break;
+ case PP_WORD:
+ if (iword(ch))
+ return 1;
+ break;
case PP_RANGE:
range++;
r1 = STOUC(UNMETA(range));
diff --git a/Test/D02glob.ztst b/Test/D02glob.ztst
index e423176a3..d21aaf8c4 100644
--- a/Test/D02glob.ztst
+++ b/Test/D02glob.ztst
@@ -323,3 +323,28 @@
print glob.tmp/ra=1.0_et=3.5/???
0:Bug with intermediate paths with plain strings but tokenized characters
>glob.tmp/ra=1.0_et=3.5/foo
+
+ doesmatch() {
+ setopt localoptions extendedglob
+ print -n $1 $2\
+ if [[ $1 = $~2 ]]; then print yes; else print no; fi;
+ }
+ doesmatch MY_IDENTIFIER '[[:IDENT:]]##'
+ doesmatch YOUR:IDENTIFIER '[[:IDENT:]]##'
+ IFS=$'\n' doesmatch $'\n' '[[:IFS:]]'
+ IFS=' ' doesmatch $'\n' '[[:IFS:]]'
+ IFS=':' doesmatch : '[[:IFSSPACE:]]'
+ IFS=' ' doesmatch ' ' '[[:IFSSPACE:]]'
+ WORDCHARS="" doesmatch / '[[:WORD:]]'
+ WORDCHARS="/" doesmatch / '[[:WORD:]]'
+0:Named character sets handled internally
+>MY_IDENTIFIER [[:IDENT:]]## yes
+>YOUR:IDENTIFIER [[:IDENT:]]## no
+>
+> [[:IFS:]] yes
+>
+> [[:IFS:]] no
+>: [[:IFSSPACE:]] no
+> [[:IFSSPACE:]] yes
+>/ [[:WORD:]] no
+>/ [[:WORD:]] yes