summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog13
-rw-r--r--Src/Zle/comp.h15
-rw-r--r--Src/Zle/complete.c16
-rw-r--r--Src/Zle/compmatch.c640
-rw-r--r--Src/Zle/computil.c4
-rw-r--r--Src/Zle/zle_utils.c7
-rw-r--r--Src/pattern.c20
7 files changed, 401 insertions, 314 deletions
diff --git a/ChangeLog b/ChangeLog
index c31491971..9fe8c2bca 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,12 +1,17 @@
+2008-11-15 Peter Stephenson <p.w.stephenson@ntlworld.com>
+
+ * 26047: Src/pattern.c, Src/Zle/comp.h, Src/Zle/compmatch.c,
+ Src/Zle/complete.c, Src/Zle/compmatch.c, Src/Zle/computil.c,
+ Src/Zle/zle_utils.c: convert lower levels of completion
+ matching to user multibyte strings / wide characters.
+
+ * Phil (unposted): README: another typo.
+
2008-11-15 Clint Adams <clint@zsh.org>
* 26046: Functions/TCP/tcp_send: return an error if session's fd is
unusable.
-2008-11-15 Peter Stephenson <p.w.stephenson@ntlworld.com>
-
- * Phil (unposted): README: another typo.
-
2008-11-13 Peter Stephenson <p.w.stephenson@ntlworld.com>
* 26042 with some fixes from 26043 (Mikael): README,
diff --git a/Src/Zle/comp.h b/Src/Zle/comp.h
index 7ac051c25..e96c4217c 100644
--- a/Src/Zle/comp.h
+++ b/Src/Zle/comp.h
@@ -190,10 +190,7 @@ struct cpattern {
* Note the allocated length may be longer
* than the null-terminated string.
*/
- int chr; /* if a single character, it
- * TODO: eventually should be a
- * convchar_t.
- */
+ convchar_t chr; /* if a single character, it */
} u;
};
@@ -201,9 +198,17 @@ struct cpattern {
* For now this just handles single-byte characters.
* TODO: this will change.
*/
+#ifdef MULTIBYTE_SUPPORT
+#define PATMATCHRANGE(r, c, ip, mtp) mb_patmatchrange(r, c, ip, mtp)
+#define PATMATCHINDEX(r, i, cp, mtp) mb_patmatchindex(r, i, cp, mtp)
+#define CONVCAST(c) ((wchar_t)(c))
+#define CHR_INVALID (WEOF)
+#else
#define PATMATCHRANGE(r, c, ip, mtp) patmatchrange(r, c, ip, mtp)
#define PATMATCHINDEX(r, i, cp, mtp) patmatchindex(r, i, cp, mtp)
-#define CONVCAST(c) (c)
+#define CONVCAST(c) (c)
+#define CHR_INVALID (-1)
+#endif
/* This is a special return value for parse_cmatcher(), *
* signalling an error. */
diff --git a/Src/Zle/complete.c b/Src/Zle/complete.c
index bc56bc05a..6398fd3e7 100644
--- a/Src/Zle/complete.c
+++ b/Src/Zle/complete.c
@@ -381,11 +381,12 @@ parse_pattern(char *name, char **sp, int *lp, char e, int *err)
{
Cpattern ret = NULL, r = NULL, n;
char *s = *sp;
- int inchar;
- int l = 0;
+ convchar_t inchar;
+ int l = 0, inlen;
*err = 0;
+ MB_METACHARINIT();
while (*s && (e ? (*s != e) : !inblank(*s))) {
n = (Cpattern) hcalloc(sizeof(*n));
n->next = NULL;
@@ -409,11 +410,12 @@ parse_pattern(char *name, char **sp, int *lp, char e, int *err)
if (*s == '\\' && s[1])
s++;
- if (*s == Meta)
- inchar = STOUC(*++s) ^ 32;
- else
- inchar = STOUC(*s);
- s++;
+ inlen = MB_METACHARLENCONV(s, &inchar);
+#ifdef MULTIBYTE_SUPPORT
+ if (inchar == WEOF)
+ inchar = (convchar_t)(*s == Meta ? s[1] ^ 32 : *s);
+#endif
+ s += inlen;
n->tp = CPAT_CHAR;
n->u.chr = inchar;
}
diff --git a/Src/Zle/compmatch.c b/Src/Zle/compmatch.c
index e651ed9ee..77dc5958d 100644
--- a/Src/Zle/compmatch.c
+++ b/Src/Zle/compmatch.c
@@ -1152,11 +1152,10 @@ comp_match(char *pfx, char *sfx, char *w, Patprog cp, Cline *clp, int qu,
*/
/**/
-mod_export int
-pattern_match1(Cpattern p, int c, int *mtp)
+mod_export convchar_t
+pattern_match1(Cpattern p, convchar_t c, int *mtp)
{
- /* TODO: should become convchar_t */
- int ind;
+ convchar_t ind;
*mtp = 0;
switch (p->tp) {
@@ -1193,29 +1192,31 @@ pattern_match1(Cpattern p, int c, int *mtp)
* wind is the index returned by a pattern match on the word pattern,
* with type wmtp.
* wchr is the word character.
- * Return -1 if no matching character, else the character.
+ * Return CHR_INVALID if no matching character, else the character.
*
* Only makes sense if lp->tp == CPAT_EQUIV and the (unseen) word
* pattern also has that type.
*/
/**/
-mod_export int
-pattern_match_equivalence(Cpattern lp, int wind, int wmtp, int wchr)
+mod_export convchar_t
+pattern_match_equivalence(Cpattern lp, convchar_t wind, int wmtp,
+ convchar_t wchr)
{
- int lchr, lmtp;
+ convchar_t lchr;
+ int lmtp;
if (!PATMATCHINDEX(lp->u.str, wind-1, &lchr, &lmtp)) {
/*
* No equivalent. No possible match; give up.
*/
- return -1;
+ return CHR_INVALID;
}
/*
* If we matched an exact character rather than a range
* type, return it.
*/
- if (lchr != -1)
+ if (lchr != CHR_INVALID)
return lchr;
/*
@@ -1223,9 +1224,9 @@ pattern_match_equivalence(Cpattern lp, int wind, int wmtp, int wchr)
* version of the word character.
*/
if (wmtp == PP_UPPER && lmtp == PP_LOWER)
- return tulower(wchr);
+ return ZC_tolower(wchr);
else if (wmtp == PP_LOWER && lmtp == PP_UPPER)
- return tuupper(wchr);
+ return ZC_toupper(wchr);
else if (wmtp == lmtp) {
/*
* Be lenient and allow identical replacements
@@ -1238,25 +1239,21 @@ pattern_match_equivalence(Cpattern lp, int wind, int wmtp, int wchr)
/*
* Non-matching generic types; this can't work.
*/
- return -1;
+ return CHR_INVALID;
}
}
/*
* Check if the given pattern matches the given string.
- * p and s are either anchor or line pattern and string;
- * wp and ws are word (candidate) pattern and string
- *
- * If only one pattern is given, we just check if characters match.
- * If both line and word are given, we check that characters match
- * for {...} classes by comparing positions in the strings.
+ * p is either an anchor or line pattern and string;
+ * wp and wsc are word (candidate) pattern and string
*
- * Patterns and strings are always passed in pairs, so it is enough
- * to check for non-NULL wp. p should always be present.
+ * Check that characters match for {...} classes by comparing positions in the
+ * strings.
*
- * If prestrict is not NULL, it is a chain of patterns at least as long
+ * prestrict is a chain of patterns at least as long
* as the line string. In this case we are still assembling the line at
- * s (which has been allocated but doesn't yet contain anything useful)
+ * newline (which has been allocated but doesn't yet contain anything useful)
* and must continue to do so as we go along; prestrict gives
* restrictions on the line character to be applied along side the other
* patterns. In the simple case a restriction is a character to be put
@@ -1264,27 +1261,22 @@ pattern_match_equivalence(Cpattern lp, int wind, int wmtp, int wchr)
* deduce an actual matching character. Note prestrict is never an
* equivalence class. In extreme cases we can't deduce a unique
* character; then the match fails.
+ *
+ * If prestrict is not NULL, s will be NULL.
*/
/**/
-mod_export int
-pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
- Cpattern prestrict)
+static int
+pattern_match_restrict(Cpattern p, Cpattern wp, convchar_t *wsc, int wsclen,
+ Cpattern prestrict, ZLE_STRING_T newline)
{
- int c, ind;
- int wc, wind;
- int len = 0, wlen, mt, wmt;
+ convchar_t c;
+ convchar_t ind, wind;
+ int mt, wmt;
- while (p && wp && (prestrict || *s) && *ws) {
+ while (p && wp && wsclen && prestrict) {
/* First test the word character */
- if (*ws == Meta) {
- wc = STOUC(ws[1]) ^ 32;
- wlen = 2;
- } else {
- wc = STOUC(*ws);
- wlen = 1;
- }
- wind = pattern_match1(wp, wc, &wmt);
+ wind = pattern_match1(wp, *wsc, &wmt);
if (!wind)
return 0;
@@ -1292,55 +1284,45 @@ pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
* Now the line character; deal with the case where
* we don't yet have it, only a restriction on it.
*/
- if (prestrict) {
- if (prestrict->tp == CPAT_CHAR) {
+ if (prestrict->tp == CPAT_CHAR) {
+ /*
+ * Easy case: restricted to an exact character on
+ * the line. Procede as normal.
+ */
+ c = prestrict->u.chr;
+ } else {
+ if (p->tp == CPAT_CHAR) {
/*
- * Easy case: restricted to an exact character on
- * the line. Procede as normal.
+ * Normal line pattern is an exact character: as
+ * long as this matches prestrict, we can proceed
+ * as usual.
*/
- c = prestrict->u.chr;
- } else {
- if (p->tp == CPAT_CHAR) {
- /*
- * Normal line pattern is an exact character: as
- * long as this matches prestrict, we can proceed
- * as usual.
- */
- c = p->u.chr;
- } else if (p->tp == CPAT_EQUIV) {
- /*
- * An equivalence, so we can deduce the character
- * backwards from the word pattern and see if it
- * matches prestrict.
- */
- if ((c = pattern_match_equivalence(p, wind, wmt, wc)) == -1)
- return 0;
- } else {
- /*
- * Not an equivalence, so that means we must match
- * the word (not just the word pattern), so grab it
- * and make sure it fulfills our needs. I think.
- * Not 100% sure about that, but what else can
- * we do? We haven't actually been passed a string
- * from the command line.
- */
- c = wc;
- }
- /* Character so deduced must match the restriction. */
- if (!pattern_match1(prestrict, c, &mt))
+ c = p->u.chr;
+ } else if (p->tp == CPAT_EQUIV) {
+ /*
+ * An equivalence, so we can deduce the character
+ * backwards from the word pattern and see if it
+ * matches prestrict.
+ */
+ if ((c = pattern_match_equivalence(p, wind, wmt, *wsc)) ==
+ CHR_INVALID)
return 0;
- }
- len = imeta(c) ? 2 : 1;
- } else {
- /* We have the character itself. */
- if (*s == Meta) {
- c = STOUC(s[1]) ^ 32;
- len = 2;
} else {
- c = STOUC(*s);
- len = 1;
+ /*
+ * Not an equivalence, so that means we must match
+ * the word (not just the word pattern), so grab it
+ * and make sure it fulfills our needs. I think.
+ * Not 100% sure about that, but what else can
+ * we do? We haven't actually been passed a string
+ * from the command line.
+ */
+ c = *wsc;
}
+ /* Character so deduced must match the restriction. */
+ if (!pattern_match1(prestrict, c, &mt))
+ return 0;
}
+
/*
* If either is "?", they match each other; no further tests.
* Apply this even if the character wasn't convertable;
@@ -1364,7 +1346,7 @@ pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
*/
if ((mt == PP_LOWER || mt == PP_UPPER) &&
(wmt == PP_LOWER || wmt == PP_UPPER)) {
- if (tulower(c) != tulower(wc))
+ if (ZC_tolower(c) != ZC_tolower(*wsc))
return 0;
} else {
/* Other different classes can't match. */
@@ -1373,71 +1355,46 @@ pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
}
}
- if (prestrict) {
- /* We need to assemble the line */
- if (imeta(c)) {
- *s++ = Meta;
- *s++ = c ^ 32;
- } else {
- *s++ = c;
- }
- prestrict = prestrict->next;
- } else
- s += len;
- ws += wlen;
+ /* We need to assemble the line */
+ *newline++ = (ZLE_CHAR_T)c;
+ prestrict = prestrict->next;
+ wsc++;
+ wsclen--;
p = p->next;
wp = wp->next;
}
- while (p && (prestrict || *s)) {
- if (prestrict) {
- /*
- * As above, but with even less info to go on.
- * (Can this happen?) At least handle the cases where
- * one of our patterns has given us a specific character.
- */
- if (prestrict->tp == CPAT_CHAR) {
- c = prestrict->u.chr;
- } else {
- if (p->tp == CPAT_CHAR) {
- c = p->u.chr;
- } else {
- /*
- * OK. Here we are in a function with just a line
- * pattern and another pattern to restrict the
- * characters that can go on the line, and no actual
- * characters. We're matching two patterns against
- * one another to generate a character to insert.
- * This is a bit too psychedelic, so I'm going to
- * bale out now. See you on the ground.
- */
- return 0;
- }
- if (!pattern_match1(prestrict, c, &mt))
- return 0;
- }
+ while (p && prestrict) {
+ /*
+ * As above, but with even less info to go on.
+ * (Can this happen?) At least handle the cases where
+ * one of our patterns has given us a specific character.
+ */
+ if (prestrict->tp == CPAT_CHAR) {
+ c = prestrict->u.chr;
} else {
- if (*s == Meta) {
- c = STOUC(s[1]) ^ 32;
- len = 2;
+ if (p->tp == CPAT_CHAR) {
+ c = p->u.chr;
} else {
- c = STOUC(*s);
- len = 1;
+ /*
+ * OK. Here we are in a function with just a line
+ * pattern and another pattern to restrict the
+ * characters that can go on the line, and no actual
+ * characters. We're matching two patterns against
+ * one another to generate a character to insert.
+ * This is a bit too psychedelic, so I'm going to
+ * bale out now. See you on the ground.
+ */
+ return 0;
}
+ if (!pattern_match1(prestrict, c, &mt))
+ return 0;
}
if (!pattern_match1(p, c, &mt))
return 0;
p = p->next;
- if (prestrict) {
- if (imeta(c)) {
- *s++ = Meta;
- *s++ = c ^ 32;
- } else {
- *s++ = c;
- }
- prestrict = prestrict->next;
- } else
- s += len;
+ *newline++ = (ZLE_CHAR_T)c;
+ prestrict = prestrict->next;
}
if (prestrict) {
@@ -1445,35 +1402,158 @@ pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
return 0;
}
- while (wp && *ws) {
+ while (wp && wsclen) {
/* No funny business when we only have the word pattern. */
- if (*ws == Meta) {
- wc = STOUC(ws[1]) ^ 32;
- wlen = 2;
- } else {
- wc = STOUC(*ws);
- wlen = 1;
- }
- if (!pattern_match1(wp, wc, &wmt))
+ if (!pattern_match1(wp, *wsc, &wmt))
return 0;
wp = wp->next;
- ws += wlen;
+ wsc++;
+ wsclen--;
}
return 1;
}
+
/*
* The usual version of pattern matching, without the line string
* being handled by restriction.
+ *
+ * Check if the given pattern matches the given string.
+ * p and s are either anchor or line pattern and string;
+ * wp and ws are word (candidate) pattern and string
+ *
+ * If only one pattern is given, we just check if characters match.
+ * If both line and word are given, we check that characters match
+ * for {...} classes by comparing positions in the strings.
+ *
+ * Patterns and strings are always passed in pairs, so it is enough
+ * to check for non-NULL wp. p should always be present.
*/
/**/
mod_export int
pattern_match(Cpattern p, char *s, Cpattern wp, char *ws)
{
- return pattern_match_restrict(p, s, wp, ws, NULL);
+ convchar_t c, wc;
+ convchar_t ind, wind;
+ int len = 0, wlen, mt, wmt;
+#ifdef MULTIBYTE_SUPPORT
+ mbstate_t lstate, wstate;
+
+ memset(&lstate, 0, sizeof(lstate));
+ memset(&wstate, 0, sizeof(wstate));
+#endif
+
+ while (p && wp && *s && *ws) {
+ /* First test the word character */
+#ifdef MULTIBYTE_SUPPORT
+ wlen = mb_metacharlenconv_r(ws, &wc, &wstate);
+#else
+ if (*ws == Meta) {
+ wc = STOUC(ws[1]) ^ 32;
+ wlen = 2;
+ } else {
+ wc = STOUC(*ws);
+ wlen = 1;
+ }
+#endif
+ wind = pattern_match1(wp, wc, &wmt);
+ if (!wind)
+ return 0;
+
+ /*
+ * Now the line character.
+ */
+#ifdef MULTIBYTE_SUPPORT
+ len = mb_metacharlenconv_r(s, &c, &lstate);
+#else
+ /* We have the character itself. */
+ if (*s == Meta) {
+ c = STOUC(s[1]) ^ 32;
+ len = 2;
+ } else {
+ c = STOUC(*s);
+ len = 1;
+ }
+#endif
+ /*
+ * If either is "?", they match each other; no further tests.
+ * Apply this even if the character wasn't convertable;
+ * there's no point trying to be clever in that case.
+ */
+ if (p->tp != CPAT_ANY || wp->tp != CPAT_ANY)
+ {
+ ind = pattern_match1(p, c, &mt);
+ if (!ind)
+ return 0;
+ if (ind != wind)
+ return 0;
+ if (mt != wmt) {
+ /*
+ * Special case if matching lower vs. upper or
+ * vice versa. The transformed characters must match.
+ * We don't need to check the transformation is
+ * the appropriate one for each character separately,
+ * since that was done in pattern_match1(), so just
+ * compare lower-cased versions of both.
+ */
+ if ((mt == PP_LOWER || mt == PP_UPPER) &&
+ (wmt == PP_LOWER || wmt == PP_UPPER)) {
+ if (ZC_tolower(c) != ZC_tolower(wc))
+ return 0;
+ } else {
+ /* Other different classes can't match. */
+ return 0;
+ }
+ }
+ }
+
+ s += len;
+ ws += wlen;
+ p = p->next;
+ wp = wp->next;
+ }
+
+ while (p && *s) {
+#ifdef MULTIBYTE_SUPPORT
+ len = mb_metacharlenconv_r(s, &c, &lstate);
+#else
+ if (*s == Meta) {
+ c = STOUC(s[1]) ^ 32;
+ len = 2;
+ } else {
+ c = STOUC(*s);
+ len = 1;
+ }
+#endif
+ if (!pattern_match1(p, c, &mt))
+ return 0;
+ p = p->next;
+ s += len;
+ }
+
+ while (wp && *ws) {
+#ifdef MULTIBYTE_SUPPORT
+ wlen = mb_metacharlenconv_r(ws, &wc, &wstate);
+#else
+ if (*ws == Meta) {
+ wc = STOUC(ws[1]) ^ 32;
+ wlen = 2;
+ } else {
+ wc = STOUC(*ws);
+ wlen = 1;
+ }
+#endif
+ if (!pattern_match1(wp, wc, &wmt))
+ return 0;
+ wp = wp->next;
+ ws += wlen;
+ }
+
+ return 1;
}
+
/* This splits the given string into a list of cline structs, separated
* at those places where one of the anchors of an `*' pattern was found.
* plen gives the number of characters on the line that matched this
@@ -1575,30 +1655,45 @@ bld_parts(char *str, int len, int plen, Cline *lp, Cline *lprem)
* buffer line. Then we test if this line matches the string given by
* wlen and word.
*
- * wpat contains pattern that matched previously
- * lpat contains the pattern for line we build
+ * The matcher ) wpat, containing pattern that matched previously
+ * mp gives ) lpat, containing the pattern for line we build
+ * line is the line we are assembling; it is initially empty
* mword is a string that matched wpat before
* word is string that we try to match now
*
* The return value is the length of the string matched in the word, it
* is zero if we couldn't build a line that matches the word.
- *
- * TODO: a lot of the nastiness associated with variable string
- * lengths can go when we switch to wide characters. (Why didn't
- * I just keep line unmetafied and metafy into place at the end? Er...)
*/
/**/
static int
-bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
+bld_line(Cmatcher mp, ZLE_STRING_T line, char *mword, char *word,
+ int wlen, int sfx)
{
Cpattern lpat = mp->line;
Cpattern wpat = mp->word;
Cpattern curgenpat;
- VARARR(struct cpattern, genpatarr, mp->llen);
Cmlist ms;
- int llen, rl;
- char *oword = word, *line = *linep;
+ int llen, rl, l;
+ convchar_t convchr, *wordcp;
+ VARARR(convchar_t, wordchars, wlen);
+ VARARR(struct cpattern, genpatarr, mp->llen);
+
+ /*
+ * We may need to start the "word" array from the end. This
+ * is much easier if we convert it to an array of (possibly wide)
+ * characters.
+ */
+ MB_METACHARINIT();
+ for (l = wlen, wordcp = wordchars; l; l--) {
+ int charlen = MB_METACHARLENCONV(word, &convchr);
+#ifdef MULTIBYTE_SUPPORT
+ if (convchr == WEOF)
+ convchr = (*word == Meta) ? word[1] ^ 32 : *word;
+#endif
+ *wordcp++ = convchr;
+ word += charlen;
+ }
/*
* Loop over all characters. At this stage, line is an empty
@@ -1616,9 +1711,10 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
* when we finally match the line against the set of matchers.
*/
curgenpat = genpatarr;
+ MB_METACHARINIT();
while (lpat) {
- int wchr = (*mword == Meta) ? STOUC(mword[1]) ^ 32 : STOUC(*mword);
- int wmtp, wind;
+ convchar_t wchr, wind;
+ int wmtp, mwordlen;
/*
* If the line pattern is an equivalence, query wpat to find the
* word part of the equivalence. If we don't find one we don't try
@@ -1628,9 +1724,10 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
* the behaviour of the old logic that this replaces.)
*/
if (lpat->tp == CPAT_EQUIV && wpat && *mword) {
+ mwordlen = MB_METACHARLENCONV(mword, &wchr);
wind = pattern_match1(wpat, wchr, &wmtp);
wpat = wpat->next;
- mword += (*mword == Meta) ? 2 : 1;
+ mword += mwordlen;
} else
wind = 0;
if (wind) {
@@ -1638,9 +1735,9 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
* Successful match for word side of equivalence.
* Find the line equivalent.
*/
- int lchr;
+ convchar_t lchr;
if ((lchr = pattern_match_equivalence(lpat, wind, wmtp, wchr))
- == -1) {
+ == CHR_INVALID) {
/*
* No equivalent. No possible match; give up.
*/
@@ -1694,50 +1791,40 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
llen = mp->llen;
rl = 0;
- *line = '\0';
if (sfx)
{
/*
* We need to work backwards from the end of both the
* word and the line strings.
- *
- * Position at the end of the word by counting characters.
*/
- int l = wlen;
- while (l--)
- word += (*word == Meta) ? 2 : 1;
+ wordcp = wordchars + wlen;
/*
- * We construct the line from the end. We've left
- * enough space for possible Meta's.
+ * We construct the line from the end.
*/
- line += 2 * llen;
- *line = '\0';
+ line += llen;
curgenpat = genpatarr + llen;
- } else
+ } else {
+ wordcp = wordchars;
curgenpat = genpatarr;
+ }
/* we now reuse mp, lpat, wpat for the global matchers */
+ MB_METACHARINIT();
while (llen && wlen) {
- int wchr, wmtp;
- char *wp;
+ convchar_t wchr;
+ int wmtp;
+ convchar_t *wp;
Cpattern tmpgenpat;
if (sfx) {
- if (word > oword + 1 && word[-2] == Meta)
- wp = word - 2;
- else
- wp = word - 1;
+ wp = wordcp - 1;
curgenpat--;
} else
- wp = word;
- if (*wp == Meta)
- wchr = STOUC(wp[1]) ^ 32;
- else
- wchr = STOUC(*wp);
- if (pattern_match1(curgenpat, wchr, &wmtp))
+ wp = wordcp;
+ if (pattern_match1(curgenpat, *wp, &wmtp))
{
- int lchr;
+ convchar_t lchr;
/*
* We can match the line character directly with the word
* character. If the line character is a fixed one,
@@ -1749,36 +1836,27 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
lchr = curgenpat->u.chr;
else
lchr = wchr;
- if (imeta(lchr)) {
- if (sfx)
- line -= 2;
- line[0] = Meta;
- line[1] = lchr ^ 32;
- if (!sfx)
- line += 2;
- } else {
- if (sfx)
- line--;
- line[0] = lchr;
- if (!sfx)
- line++;
- }
+
+ if (sfx)
+ *--line = lchr;
+ else
+ *line++ = lchr;
llen--;
wlen--;
rl++;
if (sfx)
- word = wp;
+ wordcp = wp;
else {
if (llen)
curgenpat++;
- word += (*word == Meta) ? 2 : 1;
+ wordcp++;
}
}
else
{
- char *lp;
+ ZLE_CHAR_T *lp;
/*
* Need to loop over pattern matchers.
*/
@@ -1794,66 +1872,31 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
if (mp && !mp->flags && mp->wlen <= wlen &&
mp->llen <= llen)
{
+ lp = line;
+ wp = wordcp;
+ tmpgenpat = curgenpat;
+
if (sfx) {
- /*
- * We haven't assembled the line yet, and with
- * Meta characters we don't yet know the length.
- * We'll fix this up later.
- */
- lp = line - 2 * mp->llen;
- } else
- lp = line;
- wp = word;
- if (sfx) {
- int l = mp->wlen;
- while (l--) {
- if (wp > oword + 1 && wp[-2] == Meta)
- wp -= 2;
- else
- wp--;
- }
+ lp -= mp->llen;
+ wp -= mp->wlen;
+ tmpgenpat -= mp->llen;
+ }
- tmpgenpat = curgenpat - mp->llen;
- } else
- tmpgenpat = curgenpat;
- if (pattern_match_restrict(mp->line, lp,
- mp->word, wp, tmpgenpat)) {
+ if (pattern_match_restrict(mp->line, mp->word, wp,
+ wlen - (wp - wordchars),
+ tmpgenpat, lp)) {
/*
* Matched: advance over as many characters
* of the patterns and strings as
* we've done matches.
*/
if (sfx) {
- int imove = mp->llen, nchar;
- char *pmove = lp;
- word = wp;
-
- /* Close the gap we left in the line string */
- while (imove--)
- pmove += (*pmove == Meta) ? 2 : 1;
- /* Number of bytes to move */
- nchar = (int)(pmove - lp);
- /* The size of the gap */
- imove = 2 * mp->llen - nchar;
- if (imove) {
- lp = line - imove;
- /* Moving up, so start at the top */
- while (nchar--)
- *--line = *--lp;
- /* line is at the start of the moved text */
- }
-
+ line = lp;
+ wordcp = wp;
curgenpat = tmpgenpat;
} else {
- int cnt = mp->llen;
- while (cnt--) {
- line += (*line == Meta) ? 2 : 1;
- }
-
- cnt = mp->wlen;
- while (cnt--)
- word += (*word == Meta) ? 2 : 1;
-
+ line += mp->llen;
+ wordcp += mp->wlen;
curgenpat += mp->llen;
}
llen -= mp->llen;
@@ -1869,10 +1912,6 @@ bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
}
if (!llen) {
/* Unmatched portion in the line built, return matched length. */
- if (sfx)
- *linep = line;
- else
- *line = '\0';
return rl;
}
return 0;
@@ -1891,7 +1930,14 @@ join_strs(int la, char *sa, int lb, char *sb)
Cmlist ms;
Cmatcher mp;
- int t, bl, rr = rl;
+ int t, bl;
+ /** rr is the remaining length already allocated in rs */
+ int rr = rl;
+ /*
+ * convlen is the length we need for the string converted to
+ * char * (possibly multibyte).
+ */
+ int convlen;
char *rp = rs;
while (la && lb) {
@@ -1906,35 +1952,49 @@ join_strs(int la, char *sa, int lb, char *sb)
if ((t = pattern_match(mp->word, sa, NULL, NULL)) ||
pattern_match(mp->word, sb, NULL, NULL)) {
/* It matched one of the strings, t says which one. */
- /* TODO: double to allow Meta, not necessary
- when properly unmetafied */
- VARARR(char, linearr, 2*mp->llen + 1);
- char **ap, **bp, *line = linearr;
+ VARARR(ZLE_CHAR_T, line, mp->llen);
+ char **ap, **bp;
int *alp, *blp;
if (t) {
- ap = &sa; alp = &la;
- bp = &sb; blp = &lb;
+ ap = &sa;
+ alp = &la;
+
+ bp = &sb;
+ blp = &lb;
} else {
- ap = &sb; alp = &lb;
- bp = &sa; blp = &la;
+ ap = &sb;
+ alp = &lb;
+
+ bp = &sa;
+ blp = &la;
}
/* Now try to build a string that matches the other
* string. */
- if ((bl = bld_line(mp, &line, *ap, *bp, *blp, 0))) {
+ if ((bl = bld_line(mp, line, *ap, *bp, *blp, 0))) {
/* Found one, put it into the return string. */
- if (rr <= mp->llen) {
+ char *convstr =
+ zlelineasstring(line, mp->llen, 0, &convlen,
+ NULL, 0);
+ if (rr <= convlen) {
char *or = rs;
+ int alloclen = (convlen > 20) ? convlen : 20;
- rs = realloc(rs, (rl += 20));
- rr += 20;
+ rs = realloc(rs, (rl += alloclen));
+ rr += alloclen;
rp += rs - or;
}
- memcpy(rp, line, mp->llen);
- rp += mp->llen; rr -= mp->llen;
- *ap += mp->wlen; *alp -= mp->wlen;
- *bp += bl; *blp -= bl;
+ memcpy(rp, convstr, convlen);
+ rp += convlen;
+ rr -= convlen;
+ /* HERE: multibyte chars */
+ *ap += mp->wlen;
+ *alp -= mp->wlen;
+
+ *bp += bl;
+ *blp -= bl;
t = 1;
+ free(convstr);
} else
t = 0;
}
@@ -1944,16 +2004,20 @@ join_strs(int la, char *sa, int lb, char *sb)
break;
} else {
/* Same character, just take it. */
- if (rr <= 1) {
+ if (rr <= 1 /* HERE charlen */) {
char *or = rs;
rs = realloc(rs, (rl += 20));
rr += 20;
rp += rs - or;
}
- *rp++ = *sa; rr--;
- sa++; sb++;
- la--; lb--;
+ /* HERE: multibyte char */
+ *rp++ = *sa;
+ rr--;
+ sa++;
+ sb++;
+ la--;
+ lb--;
}
}
if (la || lb)
@@ -2035,9 +2099,11 @@ check_cmdata(Cmdata md, int sfx)
} else {
md->line = 0;
md->len = md->olen = md->cl->wlen;
+ /* HERE: multibyte */
if ((md->str = md->cl->word) && sfx)
md->str += md->len;
md->alen = md->cl->llen;
+ /* HERE: multibyte */
if ((md->astr = md->cl->line) && sfx)
md->astr += md->alen;
}
@@ -2060,9 +2126,11 @@ undo_cmdata(Cmdata md, int sfx)
r->wlen = 0;
r->flags |= CLF_LINE;
r->llen = md->len;
+ /* HERE: multibyte */
r->line = md->str - (sfx ? md->len : 0);
} else if (md->len != md->olen) {
r->wlen = md->len;
+ /* HERE: multibyte */
r->word = md->str - (sfx ? md->len : 0);
DPUTS(r->wlen > 0 && !*r->word, "Bad word");
}
@@ -2116,24 +2184,24 @@ join_sub(Cmdata md, char *str, int len, int *mlen, int sfx, int join)
NULL, NULL)) ||
pattern_match(mp->word, nw - (sfx ? mp->wlen : 0),
NULL, NULL))) {
- /* TODO: doubled to allow Meta, not necessary
- * when properly unmetafied */
- VARARR(char, linearr, 2*mp->llen + 1);
+ VARARR(ZLE_CHAR_T, line, mp->llen);
int bl;
- char *mw, *line = linearr;
+ char *mw;
/* Then build all the possible lines and see
* if one of them matches the other string. */
+ /* HERE: they're multibyte */
if (t)
mw = ow - (sfx ? mp->wlen : 0);
else
mw = nw - (sfx ? mp->wlen : 0);
- if ((bl = bld_line(mp, &line, mw, (t ? nw : ow),
+ if ((bl = bld_line(mp, line, mw, (t ? nw : ow),
(t ? nl : ol), sfx))) {
/* Yep, one of the lines matched the other
* string. */
+ /* HERE: multibyte characters */
if (t) {
ol = mp->wlen; nl = bl;
} else {
@@ -2146,8 +2214,10 @@ join_sub(Cmdata md, char *str, int len, int *mlen, int sfx, int join)
md->len -= nl;
*mlen = ol;
- return get_cline(NULL, 0, dupstring(line), mp->llen,
- NULL, 0, CLF_JOIN);
+ return get_cline(NULL, 0,
+ zlelineasstring(line, mp->llen,
+ 0, NULL, NULL, 1),
+ mp->llen, NULL, 0, CLF_JOIN);
}
}
}
diff --git a/Src/Zle/computil.c b/Src/Zle/computil.c
index 264213ccf..aa5983b94 100644
--- a/Src/Zle/computil.c
+++ b/Src/Zle/computil.c
@@ -4062,7 +4062,7 @@ cfp_matcher_range(Cmatcher *ms, char *add)
len += addlen + 1;
} else {
/* The usual set of matcher possibilities. */
- int ind;
+ convchar_t ind;
if (m->line->tp == CPAT_EQUIV &&
m->word->tp == CPAT_EQUIV) {
/*
@@ -4086,7 +4086,7 @@ cfp_matcher_range(Cmatcher *ms, char *add)
* word pattern.
*/
if ((ind = pattern_match_equivalence
- (m->word, ind, mt, addc)) != -1) {
+ (m->word, ind, mt, addc)) != CHR_INVALID) {
if (ret) {
if (imeta(ind)) {
*p++ = Meta;
diff --git a/Src/Zle/zle_utils.c b/Src/Zle/zle_utils.c
index 4aca1958d..4c02d6013 100644
--- a/Src/Zle/zle_utils.c
+++ b/Src/Zle/zle_utils.c
@@ -167,9 +167,10 @@ zlecharasstring(ZLE_CHAR_T inchar, char *buf)
* instead of wide characters where appropriate and with the contents
* metafied.
*
- * If outll is non-NULL, assign the new length. If outcs is non-NULL,
- * assign the new character position. This is the conventional string
- * length, without the NULL byte.
+ * If outllp is non-NULL, assign the new length. This is the conventional
+ * string length, without the NULL byte.
+ *
+ * If outcsp is non-NULL, assign the new character position.
*
* If useheap is 1, memory is returned from the heap, else is allocated
* for later freeing.
diff --git a/Src/pattern.c b/Src/pattern.c
index 60f70023a..d4941253c 100644
--- a/Src/pattern.c
+++ b/Src/pattern.c
@@ -3344,7 +3344,6 @@ mb_patmatchrange(char *range, wchar_t ch, wint_t *indptr, int *mtp)
}
-#if 0
/*
* This is effectively the reverse of mb_patmatchrange().
* Given a range descriptor of the same form, and an index into it,
@@ -3353,11 +3352,6 @@ mb_patmatchrange(char *range, wchar_t ch, wint_t *indptr, int *mtp)
* return the type in mtp instead. Return 1 if successful, 0 if
* there was no corresponding index. Note all pointer arguments
* must be non-null.
- *
- * TODO: for now the completion matching code does not handle
- * multibyte. When it does, we will need either this, or
- * patmatchindex(), but not both---unlike user-initiated pattern
- * matching, multibyte mode in the line editor is always on when available.
*/
/**/
@@ -3438,10 +3432,9 @@ mb_patmatchindex(char *range, wint_t ind, wint_t *chr, int *mtp)
/* No corresponding index. */
return 0;
}
-#endif
/**/
-#endif
+#endif /* MULTIBYTE_SUPPORT */
/*
* Identical function to mb_patmatchrange() above for single-byte
@@ -3572,9 +3565,17 @@ patmatchrange(char *range, int ch, int *indptr, int *mtp)
return 0;
}
+
+/**/
+#ifndef MULTIBYTE_SUPPORT
+
/*
* Identical function to mb_patmatchindex() above for single-byte
* characters. Here -1 represents a character that needs a special type.
+ *
+ * Unlike patmatchrange, we only need this in ZLE, which always
+ * uses MULTIBYTE_SUPPORT if compiled in; hence we don't use
+ * this function in that case.
*/
/**/
@@ -3658,6 +3659,9 @@ patmatchindex(char *range, int ind, int *chr, int *mtp)
return 0;
}
+/**/
+#endif /* MULTIBYTE_SUPPORT */
+
/*
* Repeatedly match something simple and say how many times.
* charstart is an array parallel to that starting at patinput