summaryrefslogtreecommitdiff
path: root/Src/Zle/compmatch.c
diff options
context:
space:
mode:
authorPeter Stephenson <pws@users.sourceforge.net>2008-06-08 17:53:53 +0000
committerPeter Stephenson <pws@users.sourceforge.net>2008-06-08 17:53:53 +0000
commitbb68ee8db7971b683fba7dd7bf404186872ba7cf (patch)
treece1f5ebe661c12386675eb3ca98521280d521a0b /Src/Zle/compmatch.c
parent2dcc8627c9f4be547e6e49c76d45bea70c714a71 (diff)
downloadzsh-bb68ee8db7971b683fba7dd7bf404186872ba7cf.tar.gz
zsh-bb68ee8db7971b683fba7dd7bf404186872ba7cf.zip
25138(? mailing list stuck): rewrite of completion matching.
Will one day use multibyte/wide characters, doesn't yet.
Diffstat (limited to 'Src/Zle/compmatch.c')
-rw-r--r--Src/Zle/compmatch.c735
1 files changed, 626 insertions, 109 deletions
diff --git a/Src/Zle/compmatch.c b/Src/Zle/compmatch.c
index bf9d2cb0d..e59443c01 100644
--- a/Src/Zle/compmatch.c
+++ b/Src/Zle/compmatch.c
@@ -30,37 +30,68 @@
#include "complete.mdh"
#include "compmatch.pro"
-/* This compares two cpattern lists and returns non-zero if they are
- * equal. */
+/*
+ * This compares two cpattern lists and returns non-zero if they are
+ * equal (N.B. opposite sense to usual *cmp()).
+ *
+ * The old version of this didn't worry about whether the lists
+ * were the same length. This one does. It's hard to see how
+ * that can be wrong even if it's unnecessary.
+ */
/**/
static int
-cmp_cpatterns(Cpattern a, Cpattern b)
+cpatterns_same(Cpattern a, Cpattern b)
{
while (a) {
- if (a->equiv != b->equiv || memcmp(a->tab, b->tab, 256))
+ if (!b)
+ return 0;
+ if (a->tp != b->tp)
return 0;
+ switch (a->tp) {
+ case CPAT_CCLASS:
+ case CPAT_NCLASS:
+ case CPAT_EQUIV:
+ /*
+ * Patterns can actually match the same even if
+ * the range strings don't compare differently, but
+ * I don't think we need to handle that subtlety.
+ */
+ if (strcmp(a->u.str, b->u.str) != 0)
+ return 0;
+ break;
+
+ case CPAT_CHAR:
+ if (a->u.chr != b->u.chr)
+ return 0;
+ break;
+
+ default:
+ /* here to silence compiler */
+ break;
+ }
+
a = a->next;
b = b->next;
}
- return 1;
+ return !b;
}
/* This compares two cmatchers and returns non-zero if they are equal. */
/**/
static int
-cmp_cmatchers(Cmatcher a, Cmatcher b)
+cmatchers_same(Cmatcher a, Cmatcher b)
{
return (a == b ||
(a->flags == b->flags &&
a->llen == b->llen && a->wlen == b->wlen &&
- (!a->llen || cmp_cpatterns(a->line, b->line)) &&
- (a->wlen <= 0 || cmp_cpatterns(a->word, b->word)) &&
+ (!a->llen || cpatterns_same(a->line, b->line)) &&
+ (a->wlen <= 0 || cpatterns_same(a->word, b->word)) &&
(!(a->flags & (CMF_LEFT | CMF_RIGHT)) ||
(a->lalen == b->lalen && a->ralen == b->ralen &&
- (!a->lalen || cmp_cpatterns(a->left, b->left)) &&
- (!a->ralen || cmp_cpatterns(a->right, b->right))))));
+ (!a->lalen || cpatterns_same(a->left, b->left)) &&
+ (!a->ralen || cpatterns_same(a->right, b->right))))));
}
/* Add the given matchers to the bmatcher list. */
@@ -97,7 +128,7 @@ update_bmatchers(void)
t = 0;
for (ms = mstack; ms && !t; ms = ms->next)
for (mp = ms->matcher; mp && !t; mp = mp->next)
- t = cmp_cmatchers(mp, p->matcher);
+ t = cmatchers_same(mp, p->matcher);
p = p->next;
if (!t) {
@@ -449,7 +480,7 @@ add_match_sub(Cmatcher m, char *l, int ll, char *w, int wl)
}
}
-/* This tests if the string from the line l matches the word w. In bp
+/* This tests if the string from the line l matches the word w. In *bpp
* the offset for the brace is returned, in rwlp the length of the
* matched prefix or suffix, not including the stuff before or after
* the last anchor is given. When sfx is non-zero matching is done from
@@ -1113,55 +1144,330 @@ comp_match(char *pfx, char *sfx, char *w, Patprog cp, Cline *clp, int qu,
return r;
}
-/* Check if the given pattern matches the given string. *
+
+/*
+ * Guts of a single pattern for pattern_match().
+ * Return non-zero if match successful.
+ * If the class was an equivalence, return 1 + the index into
+ * the equivalence class (see pattern.c for how this is calculated).
+ */
+
+/**/
+mod_export int
+pattern_match1(Cpattern p, int c, int *mtp)
+{
+ /* TODO: should become convchar_t */
+ int ind;
+
+ *mtp = 0;
+ switch (p->tp) {
+ case CPAT_CCLASS:
+ return PATMATCHRANGE(p->u.str, CONVCAST(c), NULL, NULL);
+
+ case CPAT_NCLASS:
+ return !PATMATCHRANGE(p->u.str, CONVCAST(c), NULL, NULL);
+
+ case CPAT_EQUIV:
+ if (PATMATCHRANGE(p->u.str, CONVCAST(c), &ind, mtp))
+ return ind + 1;
+ else
+ return 0;
+
+ case CPAT_ANY:
+ return 1;
+
+ case CPAT_CHAR:
+ return (p->u.chr == c);
+
+ default:
+ DPUTS(1, "bad matcher pattern type");
+ return 0;
+ }
+}
+
+
+/*
+ * Use an equivalence to deduce the line character from the word, or
+ * vice versa. (If vice versa, then "line" and "word" are reversed
+ * in what follows. The logic is symmetric.)
+ * lp is the line pattern.
+ * wind is the index returned by a pattern match on the word pattern,
+ * with type wmtp.
+ * wchr is the word character.
+ * Return -1 if no matching character, else the character.
+ *
+ * Only makes sense if lp->tp == CPAT_EQUIV and the (unseen) word
+ * pattern also has that type.
+ */
+static int
+pattern_match_equivalence(Cpattern lp, int wind, int wmtp, int wchr)
+{
+ int lchr, lmtp;
+
+ if (!PATMATCHINDEX(lp->u.str, wind-1, &lchr, &lmtp)) {
+ /*
+ * No equivalent. No possible match; give up.
+ */
+ return -1;
+ }
+ /*
+ * If we matched an exact character rather than a range
+ * type, return it.
+ */
+ if (lchr != -1)
+ return lchr;
+
+ /*
+ * Check the match types. We may want a case-changed
+ * version of the word character.
+ */
+ if (wmtp == PP_UPPER && lmtp == PP_LOWER)
+ return tulower(wchr);
+ else if (wmtp == PP_LOWER && lmtp == PP_UPPER)
+ return tuupper(wchr);
+ else if (wmtp == lmtp) {
+ /*
+ * Be lenient and allow identical replacements
+ * for character classes, although in fact this
+ * doesn't give special functionality for equivalence
+ * classes.
+ */
+ return wchr;
+ } else {
+ /*
+ * Non-matching generic types; this can't work.
+ */
+ return -1;
+ }
+}
+
+/*
+ * Check if the given pattern matches the given string.
* p and s are either anchor or line pattern and string;
* wp and ws are word (candidate) pattern and string
*
- * If only one pattern is given, we just check if characters match
+ * If only one pattern is given, we just check if characters match.
* If both line and word are given, we check that characters match
- * for {...} classes by comparing relative numbers in sequence.
+ * for {...} classes by comparing positions in the strings.
*
* Patterns and strings are always passed in pairs, so it is enough
* to check for non-NULL wp. p should always be present.
+ *
+ * If prestrict is not NULL, it is a chain of patterns at least as long
+ * as the line string. In this case we are still assembling the line at
+ * s (which has been allocated but doesn't yet contain anything useful)
+ * and must continue to do so as we go along; prestrict gives
+ * restrictions on the line character to be applied along side the other
+ * patterns. In the simple case a restriction is a character to be put
+ * in place; otherwise it is a set of possible characters and we have to
+ * deduce an actual matching character. Note prestrict is never an
+ * equivalence class. In extreme cases we can't deduce a unique
+ * character; then the match fails.
*/
/**/
mod_export int
-pattern_match(Cpattern p, char *s, Cpattern wp, char *ws)
+pattern_match_restrict(Cpattern p, char *s, Cpattern wp, char *ws,
+ Cpattern prestrict)
{
- unsigned char c;
- unsigned char wc;
+ int c, ind;
+ int wc, wind;
+ int len, wlen, mt, wmt;
while (p && wp && *s && *ws) {
- c = p->tab[*((unsigned char *) s)];
- wc = wp->tab[*((unsigned char *) ws)];
-
- if (!c || !wc || c != wc)
+ /* First test the word character */
+ if (*ws == Meta) {
+ wc = STOUC(ws[1]) ^ 32;
+ wlen = 2;
+ } else {
+ wc = STOUC(*ws);
+ wlen = 1;
+ }
+ wind = pattern_match1(wp, wc, &wmt);
+ if (!wind)
return 0;
- s++;
- ws++;
+ /*
+ * Now the line character; deal with the case where
+ * we don't yet have it, only a restriction on it.
+ */
+ if (prestrict) {
+ if (prestrict->tp == CPAT_CHAR) {
+ /*
+ * Easy case: restricted to an exact character on
+ * the line. Procede as normal.
+ */
+ c = prestrict->u.chr;
+ } else {
+ if (p->tp == CPAT_CHAR) {
+ /*
+ * Normal line pattern is an exact character: as
+ * long as this matches prestrict, we can proceed
+ * as usual.
+ */
+ c = p->u.chr;
+ } else if (p->tp == CPAT_EQUIV) {
+ /*
+ * An equivalence, so we can deduce the character
+ * backwards from the word pattern and see if it
+ * matches prestrict.
+ */
+ if ((c = pattern_match_equivalence(p, wind, wmt, wc)) == -1)
+ return 0;
+ } else {
+ /*
+ * Not an equivalence, so that means we must match
+ * the word (not just the word pattern), so grab it
+ * and make sure it fulfills our needs. I think.
+ * Not 100% sure about that, but what else can
+ * we do? We haven't actually been passed a string
+ * from the command line.
+ */
+ c = wc;
+ }
+ /* Character so deduced must match the restriction. */
+ if (!pattern_match1(prestrict, c, &mt))
+ return 0;
+ }
+ len = imeta(c) ? 2 : 1;
+ } else {
+ /* We have the character itself. */
+ if (*s == Meta) {
+ c = STOUC(s[1]) ^ 32;
+ len = 2;
+ } else {
+ c = STOUC(*s);
+ len = 1;
+ }
+ }
+ /*
+ * If either is "?", they match each other; no further tests.
+ * Apply this even if the character wasn't convertable;
+ * there's no point trying to be clever in that case.
+ */
+ if (p->tp != CPAT_ANY || wp->tp != CPAT_ANY)
+ {
+ ind = pattern_match1(p, c, &mt);
+ if (!ind)
+ return 0;
+ if (ind != wind)
+ return 0;
+ if (mt != wmt) {
+ /*
+ * Special case if matching lower vs. upper or
+ * vice versa. The transformed characters must match.
+ * We don't need to check the transformation is
+ * the appropriate one for each character separately,
+ * since that was done in pattern_match1(), so just
+ * compare lower-cased versions of both.
+ */
+ if ((mt == PP_LOWER || mt == PP_UPPER) &&
+ (wmt == PP_LOWER || wmt == PP_UPPER)) {
+ if (tulower(c) != tulower(wc))
+ return 0;
+ } else {
+ /* Other different classes can't match. */
+ return 0;
+ }
+ }
+ }
+
+ if (prestrict) {
+ /* We need to assemble the line */
+ if (imeta(c)) {
+ *s++ = Meta;
+ *s++ = c ^ 32;
+ } else {
+ *s++ = c;
+ }
+ prestrict = prestrict->next;
+ } else
+ s += len;
+ ws += wlen;
p = p->next;
wp = wp->next;
}
while (p && *s) {
- if (!p->tab[*((unsigned char *) s)])
+ if (prestrict) {
+ /*
+ * As above, but with even less info to go on.
+ * (Can this happen?) At least handle the cases where
+ * one of our patterns has given us a specific character.
+ */
+ if (prestrict->tp == CPAT_CHAR) {
+ c = prestrict->u.chr;
+ } else {
+ if (p->tp == CPAT_CHAR) {
+ c = p->u.chr;
+ } else {
+ /*
+ * OK. Here we are in a function with just a line
+ * pattern and another pattern to restrict the
+ * characters that can go on the line, and no actual
+ * characters. We're matching two patterns against
+ * one another to generate a character to insert.
+ * This is a bit too psychedelic, so I'm going to
+ * bale out now. See you on the ground.
+ */
+ return 0;
+ }
+ if (!pattern_match1(prestrict, c, &mt))
+ return 0;
+ }
+ } else {
+ if (*s == Meta) {
+ c = STOUC(s[1]) ^ 32;
+ len = 2;
+ } else {
+ c = STOUC(*s);
+ len = 1;
+ }
+ }
+ if (!pattern_match1(p, c, &mt))
return 0;
p = p->next;
- s++;
+ if (prestrict) {
+ if (imeta(c)) {
+ *s++ = Meta;
+ *s++ = c ^ 32;
+ } else {
+ *s++ = c;
+ }
+ prestrict = prestrict->next;
+ } else
+ s += len;
}
while (wp && *ws) {
- if (!wp->tab[*((unsigned char *) ws)])
+ /* No funny business when we only have the word pattern. */
+ if (*ws == Meta) {
+ wc = STOUC(ws[1]) ^ 32;
+ wlen = 2;
+ } else {
+ wc = STOUC(*ws);
+ wlen = 1;
+ }
+ if (!pattern_match1(wp, wc, &wmt))
return 0;
wp = wp->next;
- ws++;
+ ws += wlen;
}
return 1;
}
+/*
+ * The usual version of pattern matching, without the line string
+ * being handled by restriction.
+ */
+/**/
+mod_export int
+pattern_match(Cpattern p, char *s, Cpattern wp, char *ws)
+{
+ return pattern_match_restrict(p, s, wp, ws, NULL);
+}
+
/* This splits the given string into a list of cline structs, separated
* at those places where one of the anchors of an `*' pattern was found.
* plen gives the number of characters on the line that matched this
@@ -1256,11 +1562,11 @@ bld_parts(char *str, int len, int plen, Cline *lp, Cline *lprem)
return ret;
}
-/* This builds all the possible line patterns for the pattern pat in the
- * buffer line. Initially line is the same as lp, but during recursive
- * calls lp is incremented for storing successive characters. Whenever
- * a full possible string is build, we test if this line matches the
- * string given by wlen and word.
+
+/*
+ * This builds all the possible line patterns for the pattern pat in the
+ * buffer line. Then we test if this line matches the string given by
+ * wlen and word.
*
* wpat contains pattern that matched previously
* lpat contains the pattern for line we build
@@ -1269,91 +1575,297 @@ bld_parts(char *str, int len, int plen, Cline *lp, Cline *lprem)
*
* The return value is the length of the string matched in the word, it
* is zero if we couldn't build a line that matches the word.
+ *
+ * TODO: a lot of the nastiness associated with variable string
+ * lengths can go when we switch to wide characters. (Why didn't
+ * I just keep line unmetafied and metafy into place at the end? Er...)
*/
-
/**/
static int
-bld_line(Cpattern wpat, Cpattern lpat, char *line, char *lp,
- char *mword, char *word, int wlen, int sfx)
+bld_line(Cmatcher mp, char **linep, char *mword, char *word, int wlen, int sfx)
{
- if (lpat) {
- /* Still working on the pattern. */
-
- int i, l;
- unsigned char c = 0;
-
- /* Get the number of the character for a correspondence class
- * if it has a corresponding class. */
- if (lpat->equiv)
- if (wpat && *mword) {
- c = wpat->tab[STOUC(*mword)];
- wpat = wpat->next;
- mword++;
+ Cpattern lpat = mp->line;
+ Cpattern wpat = mp->word;
+ Cpattern curgenpat;
+ VARARR(struct cpattern, genpatarr, mp->llen);
+ Cmlist ms;
+ int llen, rl;
+ char *oword = word, *line = *linep;
+
+ /*
+ * Loop over all characters. At this stage, line is an empty
+ * space of length llen (not counting the null byte) which we assemble as
+ * we go along.
+ *
+ * However, first we need to know what characters can appear at each
+ * point in the line. For this we assemble an list genpatarr of the
+ * same length as the line. (It's convenient to store this as an
+ * array but it's linked as a list, too.) If there are equivalences
+ * we use mword to derive the equivalent character; when we've
+ * reached the end of mword, equivalences are treated just like
+ * ordinary character classes. For character classes we just attach
+ * the class to the genpatarr list and apply it as a restriction
+ * when we finally match the line against the set of matchers.
+ */
+ curgenpat = genpatarr;
+ while (lpat) {
+ int wchr = (*mword == Meta) ? STOUC(mword[1]) ^ 32 : STOUC(*mword);
+ int wmtp, wind;
+ /*
+ * If the line pattern is an equivalence, query wpat to find the
+ * word part of the equivalence. If we don't find one we don't try
+ * equivalencing but use lpat as an ordinary match. (It's not
+ * entirely clear to me this is the correct behaviour on a
+ * failed character match within the equivalence, but that was
+ * the behaviour of the old logic that this replaces.)
+ */
+ if (lpat->tp == CPAT_EQUIV && wpat && *mword) {
+ wind = pattern_match1(wpat, wchr, &wmtp);
+ wpat = wpat->next;
+ mword += (*mword == Meta) ? 2 : 1;
+ } else
+ wind = 0;
+ if (wind) {
+ /*
+ * Successful match for word side of equivalence.
+ * Find the line equivalent.
+ */
+ int lchr;
+ if ((lchr = pattern_match_equivalence(lpat, wind, wmtp, wchr))
+ == -1) {
+ /*
+ * No equivalent. No possible match; give up.
+ */
+ return 0;
+ }
+ /*
+ * We now have an exact character to match,
+ * so make up a pattern element for it.
+ */
+ curgenpat->tp = CPAT_CHAR;
+ curgenpat->u.chr = lchr;
+ } else {
+ /*
+ * Not an equivalence class, so we just keep the
+ * test in the lpat as it is.
+ */
+ curgenpat->tp = lpat->tp;
+ if (lpat->tp == CPAT_CHAR)
+ curgenpat->u.chr = lpat->u.chr;
+ else if (lpat->tp != CPAT_ANY) {
+ /*
+ * The string isn't modified and is only needed in calls from
+ * this function, so we don't even need to copy it.
+ */
+ curgenpat->u.str = lpat->u.str;
}
+ }
+ lpat = lpat->next;
+ /*
+ * This linked list is defined above as an array.
+ * We could get away with just keeping it as an array
+ * and passing it down as such, but that's a bit icky
+ * since the generic linkage of Cpatterns is as a linked
+ * list and we should keep our local memory management
+ * problems to ourselvess.
+ */
+ if (lpat)
+ curgenpat->next = curgenpat+1;
+ else
+ curgenpat->next = NULL;
+ curgenpat++;
+ }
+ /*
+ * We now know how to match the word with the line patterns; let's
+ * see if it does. We will use the information in curgenpat if we
+ * are successful to work out what character goes on the line. This
+ * is a bit hairy, as in "the Yeti is a creature that is a bit
+ * hairy".
+ */
+ llen = mp->llen;
+ rl = 0;
- /* Walk through the table in the pattern and try the characters
- * that may appear in the current position. */
- for (i = 0; i < 256; i++)
- if ((lpat->equiv && c) ? (c == lpat->tab[i]) : lpat->tab[i]) {
- *lp = i;
- /* We stored the character, now call ourselves to build
- * the rest. */
- if ((l = bld_line(wpat, lpat->next, line, lp + 1,
- mword, word, wlen, sfx)))
- return l;
- }
- } else {
- /* We reached the end, i.e. the line string is fully build, now
- * see if it matches the given word. */
+ if (sfx)
+ {
+ /*
+ * We need to work backwards from the end of both the
+ * word and the line strings.
+ *
+ * Position at the end of the word by counting characters.
+ */
+ int l = wlen;
+ while (l--)
+ word += (*word == Meta) ? 2 : 1;
- Cmlist ms;
- Cmatcher mp;
- int l = lp - line, t, rl = 0, ind, add;
+ /*
+ * We construct the line from the end. We've left
+ * enough space for possible Meta's.
+ */
+ line += 2 * llen;
+ *line = '\0';
+ curgenpat = genpatarr + llen;
+ } else
+ curgenpat = genpatarr;
- /* Quick test if the strings are exactly the same. */
- if (l == wlen && !strncmp(line, word, l))
- return l;
+ /* we now reuse mp, lpat, wpat for the global matchers */
+ while (llen && wlen) {
+ int wchr, wmtp;
+ char *wp;
+ Cpattern tmpgenpat;
if (sfx) {
- line = lp; word += wlen;
- ind = -1; add = -1;
- } else {
- ind = 0; add = 1;
- }
- /* We loop through the whole line string built. */
- while (l && wlen) {
- if (word[ind] == line[ind]) {
- /* The same character in both strings, skip over. */
- line += add; word += add;
- l--; wlen--; rl++;
+ if (word > oword + 1 && word[-2] == Meta)
+ wp = word - 2;
+ else
+ wp = word - 1;
+ curgenpat--;
+ } else
+ wp = word;
+ if (*wp == Meta)
+ wchr = STOUC(wp[1]) ^ 32;
+ else
+ wchr = STOUC(*wp);
+ if (pattern_match1(curgenpat, wchr, &wmtp))
+ {
+ int lchr;
+ /*
+ * We can match the line character directly with the word
+ * character. If the line character is a fixed one,
+ * keep it, since we went to all that trouble above,
+ * else if it's generic, keep the word character,
+ * since we have no choice.
+ */
+ if (curgenpat->tp == CPAT_CHAR)
+ lchr = curgenpat->u.chr;
+ else
+ lchr = wchr;
+ if (imeta(lchr)) {
+ if (sfx)
+ line -= 2;
+ line[0] = Meta;
+ line[1] = lchr ^ 32;
+ if (!sfx)
+ line += 2;
} else {
- t = 0;
- for (ms = bmatchers; ms && !t; ms = ms->next) {
- mp = ms->matcher;
- if (mp && !mp->flags && mp->wlen <= wlen && mp->llen <= l &&
- pattern_match(mp->line, (sfx ? line - mp->llen : line),
- mp->word, (sfx ? word - mp->wlen : word))) {
- /* Both the line and the word pattern matched,
- * now skip over the matched portions. */
+ if (sfx)
+ line--;
+ line[0] = lchr;
+ if (!sfx)
+ line++;
+ }
+
+ llen--;
+ wlen--;
+ rl++;
+
+ if (sfx)
+ word = wp;
+ else {
+ if (llen)
+ curgenpat++;
+ word += (*word == Meta) ? 2 : 1;
+ }
+ }
+ else
+ {
+ char *lp;
+ /*
+ * Need to loop over pattern matchers.
+ */
+ for (ms = bmatchers; ms; ms = ms->next) {
+ mp = ms->matcher;
+ /*
+ * This is the nightmare case: we have line and
+ * and word matchers and some pattern which restricts
+ * the value on the line without us knowing exactly
+ * what it is. Despatch to the special function
+ * for that.
+ */
+ if (mp && !mp->flags && mp->wlen <= wlen &&
+ mp->llen <= llen)
+ {
+ if (sfx) {
+ /*
+ * We haven't assembled the line yet, and with
+ * Meta characters we don't yet know the length.
+ * We'll fix this up later.
+ */
+ lp = line - 2 * mp->llen;
+ } else
+ lp = line;
+ wp = word;
+ if (sfx) {
+ int l = mp->wlen;
+ while (l--) {
+ if (wp > oword + 1 && wp[-2] == Meta)
+ wp -= 2;
+ else
+ wp--;
+ }
+
+ tmpgenpat = curgenpat - mp->llen;
+ } else
+ tmpgenpat = curgenpat;
+ if (pattern_match_restrict(mp->line, lp,
+ mp->word, wp, tmpgenpat)) {
+ /*
+ * Matched: advance over as many characters
+ * of the patterns and strings as
+ * we've done matches.
+ */
if (sfx) {
- line -= mp->llen; word -= mp->wlen;
+ int imove = mp->llen, nchar;
+ char *pmove = lp;
+ word = wp;
+
+ /* Close the gap we left in the line string */
+ while (imove--)
+ pmove += (*pmove == Meta) ? 2 : 1;
+ /* Number of bytes to move */
+ nchar = (int)(pmove - lp);
+ /* The size of the gap */
+ imove = 2 * mp->llen - nchar;
+ if (imove) {
+ lp = line - imove;
+ /* Moving up, so start at the top */
+ while (nchar--)
+ *--line = *--lp;
+ /* line is at the start of the moved text */
+ }
+
+ curgenpat = tmpgenpat;
} else {
- line += mp->llen; word += mp->wlen;
+ int cnt = mp->llen;
+ while (cnt--) {
+ line += (*line == Meta) ? 2 : 1;
+ }
+
+ cnt = mp->wlen;
+ while (cnt--)
+ word += (*word == Meta) ? 2 : 1;
+
+ curgenpat += mp->llen;
}
- l -= mp->llen; wlen -= mp->wlen; rl += mp->wlen;
- t = 1;
+ llen -= mp->llen;
+ wlen -= mp->wlen;
+ rl += mp->wlen;
+ break;
}
}
- if (!t)
- /* Didn't match, give up. */
- return 0;
}
+ if (!ms)
+ return 0; /* Didn't match, give up */
}
- if (!l)
- /* Unmatched portion in the line built, return matched length. */
- return rl;
+ }
+ if (!llen) {
+ /* Unmatched portion in the line built, return matched length. */
+ if (sfx)
+ *linep = line;
+ else
+ *line = '\0';
+ return rl;
}
return 0;
}
@@ -1386,8 +1898,10 @@ join_strs(int la, char *sa, int lb, char *sb)
if ((t = pattern_match(mp->word, sa, NULL, NULL)) ||
pattern_match(mp->word, sb, NULL, NULL)) {
/* It matched one of the strings, t says which one. */
- VARARR(char, line, mp->llen + 1);
- char **ap, **bp;
+ /* TODO: double to allow Meta, not necessary
+ when properly unmetafied */
+ VARARR(char, linearr, 2*mp->llen + 1);
+ char **ap, **bp, *line = linearr;
int *alp, *blp;
if (t) {
@@ -1399,10 +1913,8 @@ join_strs(int la, char *sa, int lb, char *sb)
}
/* Now try to build a string that matches the other
* string. */
- if ((bl = bld_line(mp->word, mp->line, line, line,
- *ap, *bp, *blp, 0))) {
+ if ((bl = bld_line(mp, &line, *ap, *bp, *blp, 0))) {
/* Found one, put it into the return string. */
- line[mp->llen] = '\0';
if (rr <= mp->llen) {
char *or = rs;
@@ -1444,7 +1956,11 @@ join_strs(int la, char *sa, int lb, char *sb)
return rs;
}
-/* This compares the anchors stored in two top-level clines. */
+/*
+ * This compares the anchors stored in two top-level clines.
+ * It returns 1 if the anchors are the same, 2 if they are
+ * compatible (and have been combined in "o"), 0 otherwise.
+ */
/**/
static int
@@ -1591,9 +2107,11 @@ join_sub(Cmdata md, char *str, int len, int *mlen, int sfx, int join)
NULL, NULL)) ||
pattern_match(mp->word, nw - (sfx ? mp->wlen : 0),
NULL, NULL))) {
- VARARR(char, line, mp->llen + 1);
+ /* TODO: doubled to allow Meta, not necessary
+ * when properly unmetafied */
+ VARARR(char, linearr, 2*mp->llen + 1);
int bl;
- char *mw;
+ char *mw, *line = linearr;
/* Then build all the possible lines and see
* if one of them matches the other string. */
@@ -1602,11 +2120,10 @@ join_sub(Cmdata md, char *str, int len, int *mlen, int sfx, int join)
else
mw = nw - (sfx ? mp->wlen : 0);
- if ((bl = bld_line(mp->word, mp->line, line, line,
- mw, (t ? nw : ow), (t ? nl : ol), sfx))) {
+ if ((bl = bld_line(mp, &line, mw, (t ? nw : ow),
+ (t ? nl : ol), sfx))) {
/* Yep, one of the lines matched the other
* string. */
- line[mp->llen] = '\0';
if (t) {
ol = mp->wlen; nl = bl;