From 7154b8314d8b689c1a647b594044d3b735ef3725 Mon Sep 17 00:00:00 2001 From: Daniel Shahaf Date: Thu, 3 Dec 2015 23:31:49 +0000 Subject: 37296: Avoid needless calloc(). --- Src/utils.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'Src/utils.c') diff --git a/Src/utils.c b/Src/utils.c index 464097034..ca810decc 100644 --- a/Src/utils.c +++ b/Src/utils.c @@ -2861,11 +2861,12 @@ spckword(char **s, int hist, int cmd, int ask) if (strncmp(guess, best, preflen)) return; /* replace the temporarily expanded prefix with the original */ - u = (char *) hcalloc(t - *s + strlen(best + preflen) + 1); + u = (char *) zhalloc(t - *s + strlen(best + preflen) + 1); strncpy(u, *s, t - *s); strcpy(u + (t - *s), best + preflen); } else { - u = (char *) hcalloc(strlen(best) + 2); + u = (char *) zhalloc(strlen(best) + 2); + *u = '\0'; strcpy(u + 1, best); } best = u; @@ -3204,7 +3205,7 @@ zjoin(char **arr, int delim, int heap) len += strlen(*s) + 1 + (imeta(delim) ? 1 : 0); if (!len) return heap? "" : ztrdup(""); - ptr = ret = (heap ? (char *) hcalloc(len) : (char *) zshcalloc(len)); + ptr = ret = (char *) (heap ? zhalloc(len) : zalloc(len)); for (s = arr; *s; s++) { strucpy(&ptr, *s); if (imeta(delim)) { @@ -3290,7 +3291,8 @@ spacesplit(char *s, int allownull, int heap, int quote) int l = sizeof(*ret) * (wordcount(s, NULL, -!allownull) + 1); char *(*dup)(const char *) = (heap ? dupstring : ztrdup); - ptr = ret = (heap ? (char **) hcalloc(l) : (char **) zshcalloc(l)); + /* ### TODO: s/calloc/alloc/ */ + ptr = ret = (char **) (heap ? hcalloc(l) : zshcalloc(l)); if (quote) { /* @@ -3320,8 +3322,8 @@ spacesplit(char *s, int allownull, int heap, int quote) t = s; (void)findsep(&s, NULL, quote); if (s > t || allownull) { - *ptr = (heap ? (char *) hcalloc((s - t) + 1) : - (char *) zshcalloc((s - t) + 1)); + *ptr = (char *) (heap ? zhalloc((s - t) + 1) : + zalloc((s - t) + 1)); ztrncpy(*ptr++, t, s - t); } else *ptr++ = dup(nulstring); @@ -3511,7 +3513,7 @@ sepjoin(char **s, char *sep, int heap) } sl = strlen(sep); for (t = s, l = 1 - sl; *t; l += strlen(*t) + sl, t++); - r = p = (heap ? (char *) hcalloc(l) : (char *) zshcalloc(l)); + r = p = (char *) (heap ? zhalloc(l) : zalloc(l)); t = s; while (*t) { strucpy(&p, *t); @@ -3538,14 +3540,14 @@ sepsplit(char *s, char *sep, int allownull, int heap) sl = strlen(sep); n = wordcount(s, sep, 1); - r = p = (heap ? (char **) hcalloc((n + 1) * sizeof(char *)) : - (char **) zshcalloc((n + 1) * sizeof(char *))); + r = p = (char **) (heap ? zhalloc((n + 1) * sizeof(char *)) : + zalloc((n + 1) * sizeof(char *))); for (t = s; n--;) { tt = t; (void)findsep(&t, sep, 0); - *p = (heap ? (char *) hcalloc(t - tt + 1) : - (char *) zshcalloc(t - tt + 1)); + *p = (char *) (heap ? zhalloc(t - tt + 1) : + zalloc(t - tt + 1)); strncpy(*p, tt, t - tt); (*p)[t - tt] = '\0'; p++; -- cgit v1.2.3 From 5a76a5d34e75219198d012beed06047fd98c7cae Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Sun, 6 Dec 2015 15:33:02 +0000 Subject: 37314: upgrade quotedzputs() for non-printable output. Use nicechar with $'..' quoting; upgrade nicechar() etc. to use suitable output. --- ChangeLog | 6 +++ Src/utils.c | 171 ++++++++++++++++++++++++++++++++++++++++++++++++++---------- Src/zsh.h | 6 +++ 3 files changed, 156 insertions(+), 27 deletions(-) (limited to 'Src/utils.c') diff --git a/ChangeLog b/ChangeLog index eb25e08d3..aab22b0c9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2015-12-06 Peter Stephenson + + * 37314: Src/utils.c, Src/zsh.h: upgrade quotedzputs() to use + nicechar() where necessary. Regularise nicechar() etc. to prodduce + $'...' compatible output. + 2015-12-05 Daniel Shahaf * 37150: Completion/Unix/Command/_git: _git: Autocomplete .. for diff --git a/Src/utils.c b/Src/utils.c index ca810decc..d1313837d 100644 --- a/Src/utils.c +++ b/Src/utils.c @@ -411,7 +411,7 @@ putshout(int c) mod_export char * nicechar(int c) { - static char buf[6]; + static char buf[10]; char *s = buf; c &= 0xff; if (ZISPRINT(c)) @@ -427,7 +427,9 @@ nicechar(int c) goto done; } if (c == 0x7f) { - *s++ = '^'; + *s++ = '\\'; + *s++ = 'C'; + *s++ = '-'; c = '?'; } else if (c == '\n') { *s++ = '\\'; @@ -436,7 +438,9 @@ nicechar(int c) *s++ = '\\'; c = 't'; } else if (c < 0x20) { - *s++ = '^'; + *s++ = '\\'; + *s++ = 'C'; + *s++ = '-'; c += 0x40; } done: @@ -455,6 +459,22 @@ nicechar(int c) return buf; } +/* + * Return 1 if nicechar() would reformat this character. + */ + +/**/ +mod_export int +is_nicechar(int c) +{ + c &= 0xff; + if (ZISPRINT(c)) + return 0; + if (c & 0x80) + return !isset(PRINTEIGHTBIT); + return (c == 0x7f || c == '\n' || c == '\t' || c < 0x20); +} + /**/ #ifdef MULTIBYTE_SUPPORT static mbstate_t mb_shiftstate; @@ -532,7 +552,9 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep) s = buf; if (!iswprint(c) && (c < 0x80 || !isset(PRINTEIGHTBIT))) { if (c == 0x7f) { - *s++ = '^'; + *s++ = '\\'; + *s++ = 'C'; + *s++ = '-'; c = '?'; } else if (c == L'\n') { *s++ = '\\'; @@ -541,7 +563,9 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep) *s++ = '\\'; c = 't'; } else if (c < 0x20) { - *s++ = '^'; + *s++ = '\\'; + *s++ = 'C'; + *s++ = '-'; c += 0x40; } else if (c >= 0x80) { ret = -1; @@ -611,6 +635,23 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep) return buf; } +/* + * Return 1 if wcs_nicechar() would reformat this character for display. + */ + +/**/ +mod_export int is_wcs_nicechar(wchar_t c) +{ + if (!iswprint(c) && (c < 0x80 || !isset(PRINTEIGHTBIT))) { + if (c == 0x7f || c == L'\n' || c == L'\t' || c < 0x20) + return 1; + if (c >= 0x80) { + return (c >= 0x100); + } + } + return 0; +} + /**/ mod_export int zwcwidth(wint_t wc) @@ -4834,12 +4875,15 @@ niceztrlen(char const *s) * If outstrp is not NULL, set *outstrp to a zalloc'd version of * the output (still metafied). * - * If "heap" is non-zero, use the heap for *outstrp, else zalloc. + * If flags contains NICEFLAG_HEAP, use the heap for *outstrp, else + * zalloc. + * If flags contsins NICEFLAG_QUOTE, the output is going to be within + * $'...', so quote "'" with a backslash. */ /**/ mod_export size_t -mb_niceformat(const char *s, FILE *stream, char **outstrp, int heap) +mb_niceformat(const char *s, FILE *stream, char **outstrp, int flags) { size_t l = 0, newl; int umlen, outalloc, outleft, eol = 0; @@ -4886,7 +4930,10 @@ mb_niceformat(const char *s, FILE *stream, char **outstrp, int heap) cnt = 1; /* FALL THROUGH */ default: - fmt = wcs_nicechar(c, &newl, NULL); + if (c == L'\'' && (flags & NICEFLAG_QUOTE)) + fmt = "\\'"; + else + fmt = wcs_nicechar(c, &newl, NULL); break; } @@ -4920,13 +4967,71 @@ mb_niceformat(const char *s, FILE *stream, char **outstrp, int heap) if (outstrp) { *outptr = '\0'; /* Use more efficient storage for returned string */ - *outstrp = heap ? dupstring(outstr) : ztrdup(outstr); + *outstrp = (flags & NICEFLAG_HEAP) ? dupstring(outstr) : ztrdup(outstr); free(outstr); } return l; } +/* + * Return 1 if mb_niceformat() would reformat this string, else 0. + */ + +/**/ +mod_export int +is_mb_niceformat(const char *s) +{ + int umlen, eol = 0, ret = 0; + wchar_t c; + char *ums, *ptr; + mbstate_t mbs; + + ums = ztrdup(s); + untokenize(ums); + ptr = unmetafy(ums, ¨en); + + memset(&mbs, 0, sizeof mbs); + while (umlen > 0) { + size_t cnt = eol ? MB_INVALID : mbrtowc(&c, ptr, umlen, &mbs); + + switch (cnt) { + case MB_INCOMPLETE: + eol = 1; + /* FALL THROUGH */ + case MB_INVALID: + /* The byte didn't convert, so output it as a \M-... sequence. */ + if (is_nicechar(*ptr)) { + ret = 1; + break; + } + cnt = 1; + /* Get mbs out of its undefined state. */ + memset(&mbs, 0, sizeof mbs); + break; + case 0: + /* Careful: converting '\0' returns 0, but a '\0' is a + * real character for us, so we should consume 1 byte. */ + cnt = 1; + /* FALL THROUGH */ + default: + if (is_wcs_nicechar(c)) + ret = 1; + break; + } + + if (ret) + break; + + umlen -= cnt; + ptr += cnt; + } + + free(ums); + + return ret; +} + /* ztrdup multibyte string with nice formatting */ /**/ @@ -4935,7 +5040,7 @@ nicedup(const char *s, int heap) { char *retstr; - (void)mb_niceformat(s, NULL, &retstr, heap); + (void)mb_niceformat(s, NULL, &retstr, heap ? NICEFLAG_HEAP : 0); return retstr; } @@ -5717,22 +5822,35 @@ quotestring(const char *s, char **e, int instring) /* Unmetafy and output a string, quoted if it contains special characters. */ /**/ -mod_export int +mod_export void quotedzputs(char const *s, FILE *stream) { int inquote = 0, c; /* check for empty string */ - if(!*s) - return fputs("''", stream); + if(!*s) { + fputs("''", stream); + return; + } - if (!hasspecial(s)) - return zputs(s, stream); +#ifdef MULTIBYTE_SUPPORT + if (is_mb_niceformat(s)) { + fputs("$'", stream); + mb_niceformat(s, stream, NULL, NICEFLAG_QUOTE); + fputc('\'', stream); + return; + } +#endif /* MULTIBYTE_SUPPORT */ + + if (!hasspecial(s)) { + zputs(s, stream); + return; + } if (isset(RCQUOTES)) { /* use rc-style quotes-within-quotes for the whole string */ if(fputc('\'', stream) < 0) - return EOF; + return; while(*s) { if (*s == Meta) c = *++s ^ 32; @@ -5741,16 +5859,16 @@ quotedzputs(char const *s, FILE *stream) s++; if (c == '\'') { if(fputc('\'', stream) < 0) - return EOF; + return; } else if(c == '\n' && isset(CSHJUNKIEQUOTES)) { if(fputc('\\', stream) < 0) - return EOF; + return; } if(fputc(c, stream) < 0) - return EOF; + return; } if(fputc('\'', stream) < 0) - return EOF; + return; } else { /* use Bourne-style quoting, avoiding empty quoted strings */ while(*s) { @@ -5762,31 +5880,30 @@ quotedzputs(char const *s, FILE *stream) if (c == '\'') { if(inquote) { if(fputc('\'', stream) < 0) - return EOF; + return; inquote=0; } if(fputs("\\'", stream) < 0) - return EOF; + return; } else { if (!inquote) { if(fputc('\'', stream) < 0) - return EOF; + return; inquote=1; } if(c == '\n' && isset(CSHJUNKIEQUOTES)) { if(fputc('\\', stream) < 0) - return EOF; + return; } if(fputc(c, stream) < 0) - return EOF; + return; } } if (inquote) { if(fputc('\'', stream) < 0) - return EOF; + return; } } - return 0; } /* Double-quote a metafied string. */ diff --git a/Src/zsh.h b/Src/zsh.h index d3bfcefcc..caf7def06 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -3051,6 +3051,12 @@ enum { #define AFTERTRAPHOOK (zshhooks + 2) #ifdef MULTIBYTE_SUPPORT +/* Final argument to mb_niceformat() */ +enum { + NICEFLAG_HEAP = 1, /* Heap allocation where needed */ + NICEFLAG_QUOTE = 2, /* Result will appear in $'...' */ +}; + /* Metafied input */ #define nicezputs(str, outs) (void)mb_niceformat((str), (outs), NULL, 0) #define MB_METACHARINIT() mb_charinit() -- cgit v1.2.3 From 87670fb011b638e5bde744a33a047305d17d95c2 Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Sun, 6 Dec 2015 16:39:57 +0000 Subject: 37326: comment only: multibyte string lenght/width counting --- ChangeLog | 3 +++ Src/utils.c | 11 +++++++++++ 2 files changed, 14 insertions(+) (limited to 'Src/utils.c') diff --git a/ChangeLog b/ChangeLog index aab22b0c9..2c7b5ad94 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2015-12-06 Peter Stephenson + * 37326: Src/utils.c: comment how counting for multibyte string + lengths and widths is supposed to work. + * 37314: Src/utils.c, Src/zsh.h: upgrade quotedzputs() to use nicechar() where necessary. Regularise nicechar() etc. to prodduce $'...' compatible output. diff --git a/Src/utils.c b/Src/utils.c index d1313837d..45f8286df 100644 --- a/Src/utils.c +++ b/Src/utils.c @@ -5179,6 +5179,17 @@ mb_metastrlenend(char *ptr, int width, char *eptr) ret = mbrtowc(&wc, &inchar, 1, &mb_shiftstate); if (ret == MB_INCOMPLETE) { + /* + * "num_in_char" is only used for incomplete characters. The + * assumption is that we will output this ocatet as a single + * character (of single width) if we don't get a complete + * character; if we do get a complete character, num_in_char + * becomes irrelevant and is set to zero. + * + * This is in contrast to "num" which counts the characters + * or widths in complete characters. The two are summed, + * so we don't count characters twice. + */ num_in_char++; } else { if (ret == MB_INVALID) { -- cgit v1.2.3 From 9fb30cf572f7c5ff26e883396a5020da89d9541a Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Mon, 7 Dec 2015 18:54:03 +0000 Subject: 37331: Use a single chracter to represent an MB_INCOMPLETE. This is as it is likely to appear as a single character in output even if it has multiple octets. --- ChangeLog | 5 +++++ Src/utils.c | 16 ++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) (limited to 'Src/utils.c') diff --git a/ChangeLog b/ChangeLog index f5b0f2ae2..55a917068 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2015-12-07 Peter Stephenson + + * 37331: Src/utils.c: use a single character to represent an + MB_INCOMPLETE string even if multiple octets. + 2015-12-07 Peter Stephenson * 37337: Src/Zle/zle.h, Src/Zle/zle_main.c, diff --git a/Src/utils.c b/Src/utils.c index 45f8286df..fc2b1920e 100644 --- a/Src/utils.c +++ b/Src/utils.c @@ -5180,11 +5180,15 @@ mb_metastrlenend(char *ptr, int width, char *eptr) if (ret == MB_INCOMPLETE) { /* - * "num_in_char" is only used for incomplete characters. The - * assumption is that we will output this ocatet as a single + * "num_in_char" is only used for incomplete characters. + * The assumption is that we will output all trailing octets + * that form part of an incomplete character as a single * character (of single width) if we don't get a complete - * character; if we do get a complete character, num_in_char - * becomes irrelevant and is set to zero. + * character. This is purely pragmatic --- I'm not aware + * of a standard way of dealing with incomplete characters. + * + * If we do get a complete character, num_in_char + * becomes irrelevant and is set to zero * * This is in contrast to "num" which counts the characters * or widths in complete characters. The two are summed, @@ -5216,8 +5220,8 @@ mb_metastrlenend(char *ptr, int width, char *eptr) } } - /* If incomplete, treat remainder as trailing single bytes */ - return num + num_in_char; + /* If incomplete, treat remainder as trailing single character */ + return num + (num_in_char ? 1 : 0); } /* -- cgit v1.2.3 From f5b8efa7e0450450a91c8263733502f9de6729b4 Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Mon, 7 Dec 2015 21:49:07 +0000 Subject: 37344: restore old printable quoting, add ${(q+)...}. The \C- form is only used inside quotedzputs(). ${(q+)...} outputs a quotedzputs() representation. --- ChangeLog | 5 + Doc/Zsh/expn.yo | 5 + Src/subst.c | 14 ++- Src/utils.c | 264 ++++++++++++++++++++++++++++++++++++++++--------- Src/zsh.h | 8 +- Test/D04parameter.ztst | 2 +- Test/V09datetime.ztst | 2 +- 7 files changed, 247 insertions(+), 53 deletions(-) (limited to 'Src/utils.c') diff --git a/ChangeLog b/ChangeLog index 55a917068..7906ba40c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2015-12-07 Peter Stephenson + * 37344: Doc/Zsh/expn.yo, Src/subst.c, Src/utils.c, Src/zsh.h, + Test/D04parameter.ztst, Test/V09datetime.ztst: restore old + printable quoting of characters when not used from quotedzputs() + and add ${(q+)...} to output the new form. + * 37331: Src/utils.c: use a single character to represent an MB_INCOMPLETE string even if multiple octets. diff --git a/Doc/Zsh/expn.yo b/Doc/Zsh/expn.yo index 564c70dd1..c6e7b6f16 100644 --- a/Doc/Zsh/expn.yo +++ b/Doc/Zsh/expn.yo @@ -1067,6 +1067,11 @@ If a tt(q-) is given (only a single tt(q) may appear), a minimal form of single quoting is used that only quotes the string if needed to protect special characters. Typically this form gives the most readable output. + +If a tt(q+) is given, an extended form of minmal quoting is used that +causes unprintable characters to be rendered using tt($')var(...)tt('). +This quoting is similar to that used by the output of values by the +tt(typeset) family of commands. ) item(tt(Q))( Remove one level of quotes from the resulting words. diff --git a/Src/subst.c b/Src/subst.c index d9c9d24aa..bb1dd8939 100644 --- a/Src/subst.c +++ b/Src/subst.c @@ -1887,12 +1887,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, if (quotetype == QT_DOLLARS || quotetype == QT_BACKSLASH_PATTERN) goto flagerr; - if (s[1] == '-') { + if (s[1] == '-' || s[1] == '+') { if (quotemod) goto flagerr; s++; quotemod = 1; - quotetype = QT_SINGLE_OPTIONAL; + quotetype = (*s == '-') ? QT_SINGLE_OPTIONAL : + QT_QUOTEDZPUTS; } else { if (quotetype == QT_SINGLE_OPTIONAL) { /* extra q's after '-' not allowed */ @@ -3583,7 +3584,10 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, ap = aval; if (quotemod > 0) { - if (quotetype > QT_BACKSLASH) { + if (quotetype == QT_QUOTEDZPUTS) { + for (; *ap; ap++) + *ap = quotedzputs(*ap, NULL); + } else if (quotetype > QT_BACKSLASH) { int sl; char *tmp; @@ -3626,7 +3630,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags, if (!copied) val = dupstring(val), copied = 1; if (quotemod > 0) { - if (quotetype > QT_BACKSLASH) { + if (quotetype == QT_QUOTEDZPUTS) { + val = quotedzputs(val, NULL); + } else if (quotetype > QT_BACKSLASH) { int sl; char *tmp; tmp = quotestring(val, NULL, quotetype); diff --git a/Src/utils.c b/Src/utils.c index fc2b1920e..1554fa0ae 100644 --- a/Src/utils.c +++ b/Src/utils.c @@ -387,6 +387,7 @@ putshout(int c) return 0; } +#ifdef MULTIBYTE_SUPPORT /* * Turn a character into a visible representation thereof. The visible * string is put together in a static buffer, and this function returns @@ -407,6 +408,73 @@ putshout(int c) * in a multibyte string. */ +/**/ +mod_export char * +nicechar_sel(int c, int quotable) +{ + static char buf[10]; + char *s = buf; + c &= 0xff; + if (ZISPRINT(c)) + goto done; + if (c & 0x80) { + if (isset(PRINTEIGHTBIT)) + goto done; + *s++ = '\\'; + *s++ = 'M'; + *s++ = '-'; + c &= 0x7f; + if(ZISPRINT(c)) + goto done; + } + if (c == 0x7f) { + if (quotable) { + *s++ = '\\'; + *s++ = 'C'; + *s++ = '-'; + } else + *s++ = '^'; + c = '?'; + } else if (c == '\n') { + *s++ = '\\'; + c = 'n'; + } else if (c == '\t') { + *s++ = '\\'; + c = 't'; + } else if (c < 0x20) { + if (quotable) { + *s++ = '\\'; + *s++ = 'C'; + *s++ = '-'; + } else + *s++ = '^'; + c += 0x40; + } + done: + /* + * The resulting string is still metafied, so check if + * we are returning a character in the range that needs metafication. + * This can't happen if the character is printed "nicely", so + * this results in a maximum of two bytes total (plus the null). + */ + if (imeta(c)) { + *s++ = Meta; + *s++ = c ^ 32; + } else + *s++ = c; + *s = 0; + return buf; +} + +/**/ +mod_export char * +nicechar(int c) +{ + return nicechar_sel(c, 0); +} + +#else /* MULTIBYTE_SUPPORT */ + /**/ mod_export char * nicechar(int c) @@ -459,6 +527,8 @@ nicechar(int c) return buf; } +#endif /* MULTIBYTE_SUPPORT */ + /* * Return 1 if nicechar() would reformat this character. */ @@ -527,7 +597,7 @@ mb_charinit(void) /**/ mod_export char * -wcs_nicechar(wchar_t c, size_t *widthp, char **swidep) +wcs_nicechar_sel(wchar_t c, size_t *widthp, char **swidep, int quotable) { static char *buf; static int bufalloc = 0, newalloc; @@ -552,9 +622,12 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep) s = buf; if (!iswprint(c) && (c < 0x80 || !isset(PRINTEIGHTBIT))) { if (c == 0x7f) { - *s++ = '\\'; - *s++ = 'C'; - *s++ = '-'; + if (quotable) { + *s++ = '\\'; + *s++ = 'C'; + *s++ = '-'; + } else + *s++ = '^'; c = '?'; } else if (c == L'\n') { *s++ = '\\'; @@ -563,9 +636,12 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep) *s++ = '\\'; c = 't'; } else if (c < 0x20) { - *s++ = '\\'; - *s++ = 'C'; - *s++ = '-'; + if (quotable) { + *s++ = '\\'; + *s++ = 'C'; + *s++ = '-'; + } else + *s++ = '^'; c += 0x40; } else if (c >= 0x80) { ret = -1; @@ -635,6 +711,13 @@ wcs_nicechar(wchar_t c, size_t *widthp, char **swidep) return buf; } +/**/ +mod_export char * +wcs_nicechar(wchar_t c, size_t *widthp, char **swidep) +{ + return wcs_nicechar_sel(c, widthp, swidep, 0); +} + /* * Return 1 if wcs_nicechar() would reformat this character for display. */ @@ -4918,7 +5001,7 @@ mb_niceformat(const char *s, FILE *stream, char **outstrp, int flags) /* FALL THROUGH */ case MB_INVALID: /* The byte didn't convert, so output it as a \M-... sequence. */ - fmt = nicechar(*ptr); + fmt = nicechar_sel(*ptr, flags & NICEFLAG_QUOTE); newl = strlen(fmt); cnt = 1; /* Get mbs out of its undefined state. */ @@ -4933,7 +5016,7 @@ mb_niceformat(const char *s, FILE *stream, char **outstrp, int flags) if (c == L'\'' && (flags & NICEFLAG_QUOTE)) fmt = "\\'"; else - fmt = wcs_nicechar(c, &newl, NULL); + fmt = wcs_nicechar_sel(c, &newl, NULL, flags & NICEFLAG_QUOTE); break; } @@ -4967,8 +5050,13 @@ mb_niceformat(const char *s, FILE *stream, char **outstrp, int flags) if (outstrp) { *outptr = '\0'; /* Use more efficient storage for returned string */ - *outstrp = (flags & NICEFLAG_HEAP) ? dupstring(outstr) : ztrdup(outstr); - free(outstr); + if (flags & NICEFLAG_NODUP) + *outstrp = outstr; + else { + *outstrp = (flags & NICEFLAG_HEAP) ? dupstring(outstr) : + ztrdup(outstr); + free(outstr); + } } return l; @@ -5834,38 +5922,76 @@ quotestring(const char *s, char **e, int instring) return v; } -/* Unmetafy and output a string, quoted if it contains special characters. */ +/* + * Unmetafy and output a string, quoted if it contains special + * characters. + * + * If stream is NULL, return the same output with any allocation on the + * heap. + */ /**/ -mod_export void +mod_export char * quotedzputs(char const *s, FILE *stream) { int inquote = 0, c; + char *outstr, *ptr; /* check for empty string */ if(!*s) { + if (!stream) + return "''"; fputs("''", stream); - return; + return NULL; } #ifdef MULTIBYTE_SUPPORT if (is_mb_niceformat(s)) { - fputs("$'", stream); - mb_niceformat(s, stream, NULL, NICEFLAG_QUOTE); - fputc('\'', stream); - return; + if (stream) { + fputs("$'", stream); + mb_niceformat(s, stream, NULL, NICEFLAG_QUOTE); + fputc('\'', stream); + return NULL; + } else { + char *substr; + mb_niceformat(s, NULL, &substr, NICEFLAG_QUOTE|NICEFLAG_NODUP); + outstr = (char *)zhalloc(4 + strlen(substr)); + sprintf(outstr, "$'%s'", substr); + free(substr); + return outstr; + } } #endif /* MULTIBYTE_SUPPORT */ if (!hasspecial(s)) { - zputs(s, stream); - return; + if (stream) { + zputs(s, stream); + return NULL; + } else { + return dupstring(s); + } } + if (!stream) { + const char *cptr; + int l = strlen(s) + 2; + for (cptr = s; *cptr; cptr++) { + if (*cptr == Meta) + cptr++; + else if (*cptr == '\'') + l += isset(RCQUOTES) ? 1 : 3; + } + ptr = outstr = zhalloc(l + 1); + } else { + ptr = outstr = NULL; + } if (isset(RCQUOTES)) { /* use rc-style quotes-within-quotes for the whole string */ - if(fputc('\'', stream) < 0) - return; + if (stream) { + if (fputc('\'', stream) < 0) + return NULL; + } else + *ptr++ = '\''; while(*s) { if (*s == Meta) c = *++s ^ 32; @@ -5873,52 +5999,98 @@ quotedzputs(char const *s, FILE *stream) c = *s; s++; if (c == '\'') { - if(fputc('\'', stream) < 0) - return; - } else if(c == '\n' && isset(CSHJUNKIEQUOTES)) { - if(fputc('\\', stream) < 0) - return; + if (stream) { + if (fputc('\'', stream) < 0) + return NULL; + } else + *ptr++ = '\''; + } else if (c == '\n' && isset(CSHJUNKIEQUOTES)) { + if (stream) { + if (fputc('\\', stream) < 0) + return NULL; + } else + *ptr++ = '\\'; + } + if (stream) { + if (fputc(c, stream) < 0) + return NULL; + } else { + if (imeta(c)) { + *ptr++ = Meta; + *ptr++ = c ^ 32; + } else + *ptr++ = c; } - if(fputc(c, stream) < 0) - return; } - if(fputc('\'', stream) < 0) - return; + if (stream) { + if (fputc('\'', stream) < 0) + return NULL; + } else + *ptr++ = '\''; } else { /* use Bourne-style quoting, avoiding empty quoted strings */ - while(*s) { + while (*s) { if (*s == Meta) c = *++s ^ 32; else c = *s; s++; if (c == '\'') { - if(inquote) { - if(fputc('\'', stream) < 0) - return; + if (inquote) { + if (stream) { + if (putc('\'', stream) < 0) + return NULL; + } else + *ptr++ = '\''; inquote=0; } - if(fputs("\\'", stream) < 0) - return; + if (stream) { + if (fputs("\\'", stream) < 0) + return NULL; + } else { + *ptr++ = '\\'; + *ptr++ = '\''; + } } else { if (!inquote) { - if(fputc('\'', stream) < 0) - return; + if (stream) { + if (fputc('\'', stream) < 0) + return NULL; + } else + *ptr++ = '\''; inquote=1; } - if(c == '\n' && isset(CSHJUNKIEQUOTES)) { - if(fputc('\\', stream) < 0) - return; + if (c == '\n' && isset(CSHJUNKIEQUOTES)) { + if (stream) { + if (fputc('\\', stream) < 0) + return NULL; + } else + *ptr++ = '\\'; + } + if (stream) { + if (fputc(c, stream) < 0) + return NULL; + } else { + if (imeta(c)) { + *ptr++ = Meta; + *ptr++ = c ^ 32; + } else + *ptr++ = c; } - if(fputc(c, stream) < 0) - return; } } if (inquote) { - if(fputc('\'', stream) < 0) - return; + if (stream) { + if (fputc('\'', stream) < 0) + return NULL; + } else + *ptr++ = '\''; } } + if (!stream) + *ptr++ = '\0'; + + return outstr; } /* Double-quote a metafied string. */ diff --git a/Src/zsh.h b/Src/zsh.h index caf7def06..0302d6886 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -272,7 +272,12 @@ enum { /* * As QT_BACKSLASH, but a NULL string is shown as ''. */ - QT_BACKSLASH_SHOWNULL + QT_BACKSLASH_SHOWNULL, + /* + * Quoting as produced by quotedzputs(), used for human + * readability of parameter values. + */ + QT_QUOTEDZPUTS }; #define QT_IS_SINGLE(x) ((x) == QT_SINGLE || (x) == QT_SINGLE_OPTIONAL) @@ -3055,6 +3060,7 @@ enum { enum { NICEFLAG_HEAP = 1, /* Heap allocation where needed */ NICEFLAG_QUOTE = 2, /* Result will appear in $'...' */ + NICEFLAG_NODUP = 4, /* Leave allocated */ }; /* Metafied input */ diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst index 2b46e069d..1460ff6b6 100644 --- a/Test/D04parameter.ztst +++ b/Test/D04parameter.ztst @@ -398,7 +398,7 @@ foo=$'\x7f\x00' print -r -- ${(V)foo} 0:${(V)...} ->\C-?\C-@ +>^?^@ foo='playing '\''stupid'\'' "games" \w\i\t\h $quoting.' print -r ${(q)foo} diff --git a/Test/V09datetime.ztst b/Test/V09datetime.ztst index 831421dc3..7905155d8 100644 --- a/Test/V09datetime.ztst +++ b/Test/V09datetime.ztst @@ -71,4 +71,4 @@ print -r -- ${(V)"$(strftime $'%Y\0%m\0%d' 100000000)"} 0:Embedded nulls ->1973\C-@03\C-@03 +>1973^@03^@03 -- cgit v1.2.3 From 7f5b2f570961cd9b20f322d7f45318878a1a4b29 Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Tue, 8 Dec 2015 15:08:24 +0000 Subject: 37348: Tests and fix for ${(q+)...}. Needs dupstring() for empty string case. --- ChangeLog | 4 ++++ Src/utils.c | 2 +- Test/D04parameter.ztst | 14 ++++++++++++++ Test/D07multibyte.ztst | 7 +++++++ 4 files changed, 26 insertions(+), 1 deletion(-) (limited to 'Src/utils.c') diff --git a/ChangeLog b/ChangeLog index 5b9558610..75f4eeb2f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2015-12-08 Peter Stephenson + * 37348: Src/utils.c, Test/D04parameter.ztst, + Test/D07multibyte.ztst: tests for ${(q+)...} and extra + dupstring() needed for empty string case. + * 37347: Functions/VCS_Info/VCS_INFO_nvcsformats: msgs shouldn't be local here, but in caller (where it already is). diff --git a/Src/utils.c b/Src/utils.c index 1554fa0ae..6d0f88d6d 100644 --- a/Src/utils.c +++ b/Src/utils.c @@ -5940,7 +5940,7 @@ quotedzputs(char const *s, FILE *stream) /* check for empty string */ if(!*s) { if (!stream) - return "''"; + return dupstring("''"); fputs("''", stream); return NULL; } diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst index 1460ff6b6..bcea980ad 100644 --- a/Test/D04parameter.ztst +++ b/Test/D04parameter.ztst @@ -1866,3 +1866,17 @@ >0 >1 /somewhere >2 /random /value + + print -r -- ${(q+):-} + print -r -- ${(q+)IFS} + print -r -- ${(q+):-oneword} + print -r -- ${(q+):-two words} + print -r -- ${(q+):-three so-called \'words\'} + (setopt rcquotes; print -r -- ${(q+):-three so-called \'words\'}) +0:${(q+)...} +>'' +>$' \t\n\C-@' +>oneword +>'two words' +>'three so-called '\''words'\' +>'three so-called ''words''' diff --git a/Test/D07multibyte.ztst b/Test/D07multibyte.ztst index f95c06d9a..39ba5ef8b 100644 --- a/Test/D07multibyte.ztst +++ b/Test/D07multibyte.ztst @@ -546,3 +546,10 @@ [[ $'\xe3\x83\x9b' != [[:INCOMPLETE:][:INVALID:]] ]] || print fail 3 [[ $'\xe3\x83\x9b' = ? ]] || print fail 4 0:Testing incomplete and invalid multibyte character components + + print -r -- ${(q+):-ホ} + foo='She said "ホ". I said "You can'\''t '\''ホ'\'' me!' + print -r -- ${(q+)foo} +0:${(q+)...} with printable multibyte characters +>ホ +>'She said "ホ". I said "You can'\''t '\''ホ'\'' me!' -- cgit v1.2.3 From 69379369daf5a159f7ac34a33778668d8d516848 Mon Sep 17 00:00:00 2001 From: Daniel Shahaf Date: Fri, 15 Jan 2016 06:30:12 +0000 Subject: unposted: Extend docstring of callhookfunc(). --- ChangeLog | 2 ++ Src/utils.c | 3 +++ 2 files changed, 5 insertions(+) (limited to 'Src/utils.c') diff --git a/ChangeLog b/ChangeLog index abc67383e..fbfc6c127 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,7 @@ 2016-01-15 Daniel Shahaf + * unposted: Src/utils.c: Extend docstring of callhookfunc(). + * unposted: Functions/VCS_Info/Backends/VCS_INFO_detect_p4: Fix typo in comment. diff --git a/Src/utils.c b/Src/utils.c index 6d0f88d6d..788eba97e 100644 --- a/Src/utils.c +++ b/Src/utils.c @@ -1450,6 +1450,9 @@ time_t lastwatch; * If "retval" is not NULL, the return value of the first hook function to * return non-zero is stored in *"retval". The return value is not otherwise * available as the calling context is restored. + * + * Returns 0 if at least one function was called (regardless of that function's + * exit status), and 1 otherwise. */ /**/ -- cgit v1.2.3 From 8eb9070d6785f423dd9bdbbb0513aa47c8a08d62 Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Tue, 19 Jan 2016 09:55:46 +0000 Subject: 37678: Now possible to quote "-" in pattern range --- ChangeLog | 4 ++++ Src/glob.c | 22 +++++++++++++++++++--- Src/lex.c | 30 ++++++++++++++++++++++++------ Src/pattern.c | 4 ++-- Src/utils.c | 2 +- Src/zsh.h | 18 ++++++++++++------ Test/D02glob.ztst | 40 ++++++++++++++++++++++++++++++++++++++++ 7 files changed, 102 insertions(+), 18 deletions(-) (limited to 'Src/utils.c') diff --git a/ChangeLog b/ChangeLog index d64e2c121..71acc1e64 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2016-01-18 Daniel Shahaf + * 37678: Src/glob.c, Src/lex.c, Src/pattern.c, Src/utils.c, + Src/zsh.h, Test/D02glob.ztst: Now possible to quote "-" in + a pattern range. + * 37634: Completion/Unix/Command/_man: _man: Support subsection names such as '3p'. diff --git a/Src/glob.c b/Src/glob.c index 8bd2fc493..e5d8956e6 100644 --- a/Src/glob.c +++ b/Src/glob.c @@ -3476,7 +3476,7 @@ static void zshtokenize(char *s, int flags) { char *t; - int bslash = 0; + int bslash = 0, seen_brct = 0; for (; *s; s++) { cont: @@ -3507,21 +3507,35 @@ zshtokenize(char *s, int flags) *t = Inang; *s = Outang; break; + case '[': + if (bslash) + s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull; + else { + seen_brct = 1; + *s = Inbrack; + } + break; + case '-': + if (bslash) + s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull; + else if (seen_brct) /* see corresonding code in lex.c */ + *s = Dash; + break; case '(': case '|': case ')': if (flags & ZSHTOK_SHGLOB) break; + /*FALLTHROUGH*/ case '>': case '^': case '#': case '~': - case '[': case ']': case '*': case '?': case '=': - for (t = ztokens; *t; t++) + for (t = ztokens; *t; t++) { if (*t == *s) { if (bslash) s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull; @@ -3529,6 +3543,8 @@ zshtokenize(char *s, int flags) *s = (t - ztokens) + Pound; break; } + } + break; } bslash = 0; } diff --git a/Src/lex.c b/Src/lex.c index 0f260d08f..9a7e3b8fe 100644 --- a/Src/lex.c +++ b/Src/lex.c @@ -35,7 +35,7 @@ /* tokens */ /**/ -mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,'\"\\\\"; +mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,-'\"\\\\"; /* parts of the current token */ @@ -394,8 +394,9 @@ ctxtlex(void) #define LX2_DQUOTE 15 #define LX2_BQUOTE 16 #define LX2_COMMA 17 -#define LX2_OTHER 18 -#define LX2_META 19 +#define LX2_DASH 18 +#define LX2_OTHER 19 +#define LX2_META 20 static unsigned char lexact1[256], lexact2[256], lextok2[256]; @@ -405,7 +406,7 @@ initlextabs(void) { int t0; static char *lx1 = "\\q\n;!&|(){}[]<>"; - static char *lx2 = ";)|$[]~({}><=\\\'\"`,"; + static char *lx2 = ";)|$[]~({}><=\\\'\"`,-"; for (t0 = 0; t0 != 256; t0++) { lexact1[t0] = LX1_OTHER; @@ -919,7 +920,7 @@ gettok(void) static enum lextok gettokstr(int c, int sub) { - int bct = 0, pct = 0, brct = 0, fdpar = 0; + int bct = 0, pct = 0, brct = 0, seen_brct = 0, fdpar = 0; int intpos = 1, in_brace_param = 0; int inquote, unmatched = 0; enum lextok peek; @@ -1033,8 +1034,10 @@ gettokstr(int c, int sub) } break; case LX2_INBRACK: - if (!in_brace_param) + if (!in_brace_param) { brct++; + seen_brct = 1; + } c = Inbrack; break; case LX2_OUTBRACK: @@ -1346,6 +1349,21 @@ gettokstr(int c, int sub) c = Tick; SETPAREND break; + case LX2_DASH: + /* + * - shouldn't be treated as a special character unless + * we're in a pattern. Howeve,simply counting "[" doesn't + * work as []a-z] is a valid expression and we don't know + * down here what this "[" is for as $foo[stuff] is valid + * in zsh. So just detect an opening [, which is enough + * to turn this into a pattern; the Dash will be harmlessly + * untokenised if not wanted. + */ + if (seen_brct) + c = Dash; + else + c = '-'; + break; } add(c); c = hgetc(); diff --git a/Src/pattern.c b/Src/pattern.c index 9e8a80ae1..d2b8c590b 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -1459,7 +1459,7 @@ patcomppiece(int *flagp, int paren) charstart = patparse; METACHARINC(patparse); - if (*patparse == '-' && patparse[1] && + if (*patparse == Dash && patparse[1] && patparse[1] != Outbrack) { patadd(NULL, STOUC(Meta)+PP_RANGE, 1, PA_NOALIGN); if (itok(*charstart)) { @@ -1468,7 +1468,7 @@ patcomppiece(int *flagp, int paren) } else { patadd(charstart, 0, patparse-charstart, PA_NOALIGN); } - charstart = ++patparse; /* skip ASCII '-' */ + charstart = ++patparse; /* skip Dash token */ METACHARINC(patparse); } if (itok(*charstart)) { diff --git a/Src/utils.c b/Src/utils.c index 788eba97e..fd0bab320 100644 --- a/Src/utils.c +++ b/Src/utils.c @@ -3888,7 +3888,7 @@ inittyptab(void) typtab['\0'] |= IMETA; typtab[STOUC(Meta) ] |= IMETA; typtab[STOUC(Marker)] |= IMETA; - for (t0 = (int)STOUC(Pound); t0 <= (int)STOUC(Comma); t0++) + for (t0 = (int)STOUC(Pound); t0 <= (int)STOUC(LAST_NORMAL_TOK); t0++) typtab[t0] |= ITOK | IMETA; for (t0 = (int)STOUC(Snull); t0 <= (int)STOUC(Nularg); t0++) typtab[t0] |= ITOK | IMETA | INULL; diff --git a/Src/zsh.h b/Src/zsh.h index 0302d6886..6ee2a9c8d 100644 --- a/Src/zsh.h +++ b/Src/zsh.h @@ -192,24 +192,30 @@ struct mathfunc { #define Tilde ((char) 0x98) #define Qtick ((char) 0x99) #define Comma ((char) 0x9a) +#define Dash ((char) 0x9b) /* Only in patterns */ +/* + * Marks the last of the group above. + * Remaining tokens are even more special. + */ +#define LAST_NORMAL_TOK Dash /* * Null arguments: placeholders for single and double quotes * and backslashes. */ -#define Snull ((char) 0x9b) -#define Dnull ((char) 0x9c) -#define Bnull ((char) 0x9d) +#define Snull ((char) 0x9c) +#define Dnull ((char) 0x9d) +#define Bnull ((char) 0x9e) /* * Backslash which will be returned to "\" instead of being stripped * when we turn the string into a printable format. */ -#define Bnullkeep ((char) 0x9e) +#define Bnullkeep ((char) 0x9f) /* * Null argument that does not correspond to any character. * This should be last as it does not appear in ztokens and * is used to initialise the IMETA type in inittyptab(). */ -#define Nularg ((char) 0x9f) +#define Nularg ((char) 0xa0) /* * Take care to update the use of IMETA appropriately when adding @@ -220,7 +226,7 @@ struct mathfunc { * Also used in pattern character arrays as guaranteed not to * mark a character in a string. */ -#define Marker ((char) 0xa0) +#define Marker ((char) 0xa1) /* chars that need to be quoted if meant literally */ diff --git a/Test/D02glob.ztst b/Test/D02glob.ztst index f944a4fbd..89256e303 100644 --- a/Test/D02glob.ztst +++ b/Test/D02glob.ztst @@ -582,3 +582,43 @@ >1 OK >2 OK >3 OK + + [[ foo = 'f'\o"o" ]] +0:Stripping of quotes from patterns (1) + + [[ foo = 'f'('o'|'a')('o'|'b') ]] +0:Stripping of quotes from patterns (2) + + [[ fob = 'f'('o'|'a')('o'|'b') ]] +0:Stripping of quotes from patterns (3) + + [[ fab = 'f'('o'|'a')('o'|'b') ]] +0:Stripping of quotes from patterns (4) + + [[ fib != 'f'('o'|'a')('o'|'b') ]] +0:Stripping of quotes from patterns (4) + + [[ - != [a-z] ]] +0:- is a special character in ranges + + [[ - = ['a-z'] ]] +0:- is not a special character in ranges if quoted + + [[ b-1 = [a-z]-[0-9] ]] +0:- untokenized following a bracketed subexpression + + [[ b-1 = []a-z]-[]0-9] ]] +0:- "]" after "[" is normal range character and - still works + + headremove="bcdef" + print ${headremove#[a-z]} +0:active - works in pattern in parameter +>cdef + + headremove="bcdef" + print ${headremove#['a-z']} + headremove="-cdef" + print ${headremove#['a-z']} +0:quoted - works in pattern in parameter +>bcdef +>cdef -- cgit v1.2.3