summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--Doc/Zsh/options.yo30
-rwxr-xr-xMisc/globtests1
-rw-r--r--Src/options.c8
-rw-r--r--Src/pattern.c4
-rw-r--r--Test/D02glob.ztst9
-rw-r--r--Test/D07multibyte.ztst38
7 files changed, 79 insertions, 16 deletions
diff --git a/ChangeLog b/ChangeLog
index 62f8cec19..cfc6373e3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
2006-07-25 Peter Stephenson <pws@csr.com>
+ * 22557: Doc/Zsh/options.yo, Misc/globtests, Src/options.c,
+ Src/pattern.c, Test/D02glob.ztst, Test/D07multibyte.ztst:
+ Turn on multibyte option by default for MULTIBYTE_SUPPORT and fix
+ tests and patterns.
+
* unposted: Src/pattern.c, Src/utils.c: minor typos in
22556 found when MULTIBYTE_SUPPORT is not defined.
diff --git a/Doc/Zsh/options.yo b/Doc/Zsh/options.yo
index 589ed79cb..02d8fa046 100644
--- a/Doc/Zsh/options.yo
+++ b/Doc/Zsh/options.yo
@@ -411,19 +411,31 @@ item(tt(MARK_DIRS) (tt(-8), ksh: tt(-X)))(
Append a trailing `tt(/)' to all directory
names resulting from filename generation (globbing).
)
-pindex(MULTIBYTE)
+pindex(MULTIBYTE <D>)
cindex(characters, multibyte, in expansion and globbing)
cindex(multibyte characters, in expansion and globbing)
item(tt(MULTIBYTE))(
-Respect multibyte characters when found during pattern matching.
-When this option is set, characters strings are examined using the
+Respect multibyte characters when found in strings.
+When this option is set, strings are examined using the
system library to determine how many bytes form a character, depending
-on the current locale. If the option is unset
-(or the shell was not compiled with the configuration option
-tt(MULTIBYTE_SUPPORT)) a single byte is always treated as a single
-character. The option will eventually be extended to cover expansion.
-Note, however, that it does not affect the shellʼs editor, which always
-uses the locale to determine multibyte characters.
+on the current locale. This affects the way characters are counted in
+pattern matching, parameter values and various delimiters.
+
+The option is on by default if the shell was compiled with
+tt(MULTIBYTE_SUPPORT); otherwise it is off by default and has no effect if
+turned on.
+
+If the option is off a single byte is always treated as a single
+character. This setting is designed purely for examining strings
+known to contain raw bytes or other values that may not be characters
+in the current locale. It is not necessary to unset the option merely
+because the character set for the current locale does not contain multibyte
+characters.
+
+The option does not affect the shell's editor, which always uses the
+locale to determine multibyte characters. This is because
+the character set displayed by the terminal emulator is independent of
+shell settings.
)
pindex(NOMATCH)
cindex(globbing, no matches)
diff --git a/Misc/globtests b/Misc/globtests
index 232fe3daa..a5f7c4a00 100755
--- a/Misc/globtests
+++ b/Misc/globtests
@@ -182,6 +182,5 @@ f atest/path *((#s)|/)test((#e)|/)*
f path/testy *((#s)|/)test((#e)|/)*
f path/testy/ohyes *((#s)|/)test((#e)|/)*
f path/atest/ohyes *((#s)|/)test((#e)|/)*
-t bjrn *[]*
EOT
print "$failed tests failed."
diff --git a/Src/options.c b/Src/options.c
index 307bd5430..05e878687 100644
--- a/Src/options.c
+++ b/Src/options.c
@@ -166,7 +166,13 @@ static struct optname optns[] = {
{{NULL, "markdirs", 0}, MARKDIRS},
{{NULL, "menucomplete", 0}, MENUCOMPLETE},
{{NULL, "monitor", OPT_SPECIAL}, MONITOR},
-{{NULL, "multibyte", 0/*TBD*/}, MULTIBYTE},
+{{NULL, "multibyte",
+#ifdef MULTIBYTE_SUPPORT
+ OPT_ALL
+#else
+ 0
+#endif
+ }, MULTIBYTE},
{{NULL, "multios", OPT_EMULATE|OPT_ZSH}, MULTIOS},
{{NULL, "nomatch", OPT_EMULATE|OPT_NONBOURNE},NOMATCH},
{{NULL, "notify", OPT_ZSH}, NOTIFY},
diff --git a/Src/pattern.c b/Src/pattern.c
index 24077768d..9ae00ca94 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -343,7 +343,7 @@ metacharinc(char **x)
/* Error. Treat as single byte. */
/* Reset the shift state for next time. */
memset(&shiftstate, 0, sizeof(shiftstate));
- return (wchar_t) *(*x)++;
+ return (wchar_t) STOUC(*(*x)++);
}
#else
@@ -595,7 +595,7 @@ patcompile(char *exp, int inflags, char **endexp)
while (oplen--) {
if (imeta(*opnd)) {
*dst++ = Meta;
- *dst++ = *opnd ^ 32;
+ *dst++ = *opnd++ ^ 32;
} else {
*dst++ = *opnd++;
}
diff --git a/Test/D02glob.ztst b/Test/D02glob.ztst
index 409a73e30..7c76414f0 100644
--- a/Test/D02glob.ztst
+++ b/Test/D02glob.ztst
@@ -6,7 +6,9 @@
mkdir glob.tmp/dir3/subdir
: >glob.tmp/{,{dir1,dir2}/}{a,b,c}
- globtest () { $ZTST_testdir/../Src/zsh -f $ZTST_srcdir/../Misc/$1 }
+ globtest () {
+ $ZTST_testdir/../Src/zsh -f $ZTST_srcdir/../Misc/$1
+ }
regress_absolute_path_and_core_dump() {
local absolute_dir=$(cd glob.tmp && pwd -P)
@@ -175,7 +177,6 @@
>1: [[ path/testy = *((#s)|/)test((#e)|/)* ]]
>1: [[ path/testy/ohyes = *((#s)|/)test((#e)|/)* ]]
>1: [[ path/atest/ohyes = *((#s)|/)test((#e)|/)* ]]
->0: [[ bjrn = *[]* ]]
>0 tests failed.
globtest globtests.ksh
@@ -263,6 +264,10 @@
>0: [[ Modules = (#i)*m* ]]
>0 tests failed.
+ (unsetopt multibyte
+ [[ bjrn = *[]* ]])
+0:single byte match with top bit set
+
( regress_absolute_path_and_core_dump )
0:exclusions regression test
>
diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst
index 683e8350e..263a7a44e 100644
--- a/Test/D07multibyte.ztst
+++ b/Test/D07multibyte.ztst
@@ -176,7 +176,7 @@
?(eval):1: command not found: hähä=3
foo="Ølaf«Ødd«øpénëd«ån«àpple"
- print -l ${(s.«.)foo}
+ print -l ${(s.«.)foo}
ioh="Ἐν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ λόγος."
print -l ${=ioh}
print ${(w)#ioh}
@@ -228,3 +228,39 @@
0:read multibyte characters
<«»ignored
>«»
+
+ # See if the system grokks first-century Greek...
+ ioh="Ἐν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ λόγος."
+ for (( i = 1; i <= ${#ioh}; i++ )); do
+ # FC3 doesn't recognise ῇ (U+1FC7: Greek small letter eta with
+ # perispomeni and ypogegrammeni, of course) as a lower case character.
+ if [[ $ioh[i] != [[:lower:]] && $i != 7 ]]; then
+ for tp in upper space punct invalid; do
+ if [[ $tp = invalid || $ioh[i] = [[:${tp}:]] ]]; then
+ print "$i: $tp"
+ break
+ fi
+ done
+ fi
+ done
+0:isw* functions on non-ASCII wide characters
+>1: upper
+>3: space
+>8: space
+>11: space
+>13: space
+>19: punct
+>20: space
+>24: space
+>26: space
+>32: space
+>35: space
+>40: space
+>44: space
+>49: punct
+>50: space
+>54: space
+>59: space
+>62: space
+>64: space
+>70: punct