浏览代码

Adding stripctrl.c which is needed for PuTTY abfc751c

Source commit: 3bba4fa1169f184e6167786a72cf97142d291070
Martin Prikryl 6 年之前
父节点
当前提交
41bb9fe2c0
共有 1 个文件被更改,包括 464 次插入0 次删除
  1. 464 0
      source/putty/stripctrl.c

+ 464 - 0
source/putty/stripctrl.c

@@ -0,0 +1,464 @@
+/*
+ * stripctrl.c: a facility for stripping control characters out of a
+ * data stream (defined as any multibyte character in the system
+ * locale which is neither printable nor \n), using the standard C
+ * library multibyte character facilities.
+ */
+
+#include <assert.h>
+#include <locale.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include "putty.h"
+#include "terminal.h"
+#include "misc.h"
+#include "marshal.h"
+
+#define SCC_BUFSIZE 64
+#define LINE_LIMIT 77
+
+typedef struct StripCtrlCharsImpl StripCtrlCharsImpl;
+struct StripCtrlCharsImpl {
+    mbstate_t mbs_in, mbs_out;
+
+    bool permit_cr;
+    wchar_t substitution;
+
+    char buf[SCC_BUFSIZE];
+    size_t buflen;
+
+    Terminal *term;
+    bool last_term_utf;
+    struct term_utf8_decode utf8;
+    unsigned long (*translate)(Terminal *, term_utf8_decode *, unsigned char);
+
+    bool line_limit;
+    bool line_start;
+    size_t line_chars_remaining;
+
+    BinarySink *bs_out;
+
+    StripCtrlChars public;
+};
+
+static void stripctrl_locale_BinarySink_write(
+    BinarySink *bs, const void *vp, size_t len);
+static void stripctrl_term_BinarySink_write(
+    BinarySink *bs, const void *vp, size_t len);
+
+static StripCtrlCharsImpl *stripctrl_new_common(
+    BinarySink *bs_out, bool permit_cr, wchar_t substitution)
+{
+    StripCtrlCharsImpl *scc = snew(StripCtrlCharsImpl);
+    memset(scc, 0, sizeof(StripCtrlCharsImpl)); /* zeroes mbstates */
+    scc->bs_out = bs_out;
+    scc->permit_cr = permit_cr;
+    scc->substitution = substitution;
+    return scc;
+}
+
+StripCtrlChars *stripctrl_new(
+    BinarySink *bs_out, bool permit_cr, wchar_t substitution)
+{
+    StripCtrlCharsImpl *scc = stripctrl_new_common(
+        bs_out, permit_cr, substitution);
+    BinarySink_INIT(&scc->public, stripctrl_locale_BinarySink_write);
+    return &scc->public;
+}
+
+StripCtrlChars *stripctrl_new_term_fn(
+    BinarySink *bs_out, bool permit_cr, wchar_t substitution,
+    Terminal *term, unsigned long (*translate)(
+        Terminal *, term_utf8_decode *, unsigned char))
+{
+    StripCtrlCharsImpl *scc = stripctrl_new_common(
+        bs_out, permit_cr, substitution);
+    scc->term = term;
+    scc->translate = translate;
+    BinarySink_INIT(&scc->public, stripctrl_term_BinarySink_write);
+    return &scc->public;
+}
+
+void stripctrl_retarget(StripCtrlChars *sccpub, BinarySink *new_bs_out)
+{
+    StripCtrlCharsImpl *scc =
+        container_of(sccpub, StripCtrlCharsImpl, public);
+    scc->bs_out = new_bs_out;
+    stripctrl_reset(sccpub);
+}
+
+void stripctrl_reset(StripCtrlChars *sccpub)
+{
+    StripCtrlCharsImpl *scc =
+        container_of(sccpub, StripCtrlCharsImpl, public);
+
+    /*
+     * Clear all the fields that might have been in the middle of a
+     * multibyte character or non-default shift state, so that we can
+     * start converting a fresh piece of data to send to a channel
+     * that hasn't seen the previous output.
+     */
+    memset(&scc->utf8, 0, sizeof(scc->utf8));
+    memset(&scc->mbs_in, 0, sizeof(scc->mbs_in));
+    memset(&scc->mbs_out, 0, sizeof(scc->mbs_out));
+
+    /*
+     * Also, reset the line-limiting system to its starting state.
+     */
+    scc->line_start = true;
+}
+
+void stripctrl_free(StripCtrlChars *sccpub)
+{
+    StripCtrlCharsImpl *scc =
+        container_of(sccpub, StripCtrlCharsImpl, public);
+    smemclr(scc, sizeof(StripCtrlCharsImpl));
+    sfree(scc);
+}
+
+void stripctrl_enable_line_limiting(StripCtrlChars *sccpub)
+{
+    StripCtrlCharsImpl *scc =
+        container_of(sccpub, StripCtrlCharsImpl, public);
+    scc->line_limit = true;
+    scc->line_start = true;
+}
+
+static inline bool stripctrl_ctrlchar_ok(StripCtrlCharsImpl *scc, wchar_t wc)
+{
+    return wc == L'\n' || (wc == L'\r' && scc->permit_cr);
+}
+
+static inline void stripctrl_check_line_limit(
+    StripCtrlCharsImpl *scc, wchar_t wc, size_t width)
+{
+    if (!scc->line_limit)
+        return;                        /* nothing to do */
+
+    if (scc->line_start) {
+        put_datapl(scc->bs_out, PTRLEN_LITERAL("| "));
+        scc->line_start = false;
+        scc->line_chars_remaining = LINE_LIMIT;
+    }
+
+    if (wc == '\n') {
+        scc->line_start = true;
+        return;
+    }
+
+    if (scc->line_chars_remaining < width) {
+        put_datapl(scc->bs_out, PTRLEN_LITERAL("\r\n> "));
+        scc->line_chars_remaining = LINE_LIMIT;
+    }
+
+    assert(width <= scc->line_chars_remaining);
+    scc->line_chars_remaining -= width;
+}
+
+static inline void stripctrl_locale_put_wc(StripCtrlCharsImpl *scc, wchar_t wc)
+{
+    if (iswprint(wc) || stripctrl_ctrlchar_ok(scc, wc)) {
+        /* Printable character, or one we're going to let through anyway. */
+    } else if (scc->substitution) {
+        wc = scc->substitution;
+    } else {
+        /* No defined substitution, so don't write any output wchar_t. */
+        return;
+    }
+
+    stripctrl_check_line_limit(scc, wc, mk_wcwidth(wc));
+
+    char outbuf[MB_LEN_MAX];
+    size_t produced = wcrtomb(outbuf, wc, &scc->mbs_out);
+    if (produced > 0)
+        put_data(scc->bs_out, outbuf, produced);
+}
+
+static inline void stripctrl_term_put_wc(
+    StripCtrlCharsImpl *scc, unsigned long wc)
+{
+    ptrlen prefix = PTRLEN_LITERAL("");
+
+    if (!(wc & ~0x9F)) {
+        /* This is something the terminal interprets as a control
+         * character. */
+        if (!stripctrl_ctrlchar_ok(scc, wc)) {
+            if (!scc->substitution)
+                return;
+            else
+                wc = scc->substitution;
+        }
+
+        if (wc == '\012') {
+            /* Precede \n with \r, because our terminal will not
+             * generally be in the ONLCR mode where it assumes that
+             * internally, and any \r on input has been stripped
+             * out. */
+            prefix = PTRLEN_LITERAL("\r");
+        }
+    }
+
+    stripctrl_check_line_limit(scc, wc, term_char_width(scc->term, wc));
+
+    if (prefix.len)
+        put_datapl(scc->bs_out, prefix);
+
+    char outbuf[6];
+    size_t produced;
+
+    /*
+     * The Terminal implementation encodes 7-bit ASCII characters in
+     * UTF-8 mode, and all printing characters in non-UTF-8 (i.e.
+     * single-byte character set) mode, as values in the surrogate
+     * range (a conveniently unused piece of space in this context)
+     * whose low byte is the original 1-byte representation of the
+     * character.
+     */
+    if ((wc - 0xD800) < (0xE000 - 0xD800))
+        wc &= 0xFF;
+
+    if (in_utf(scc->term)) {
+        produced = encode_utf8(outbuf, wc);
+    } else {
+        outbuf[0] = wc;
+        produced = 1;
+    }
+
+    if (produced > 0)
+        put_data(scc->bs_out, outbuf, produced);
+}
+
+static inline size_t stripctrl_locale_try_consume(
+    StripCtrlCharsImpl *scc, const char *p, size_t len)
+{
+    wchar_t wc;
+    mbstate_t mbs_orig = scc->mbs_in;
+    size_t consumed = mbrtowc(&wc, p, len, &scc->mbs_in);
+
+    if (consumed == (size_t)-2) {
+        /*
+         * The buffer is too short to see the end of the multibyte
+         * character that it appears to be starting with. We return 0
+         * for 'no data consumed', restore the conversion state from
+         * before consuming the partial character, and our caller will
+         * come back when it has more data available.
+         */
+        scc->mbs_in = mbs_orig;
+        return 0;
+    }
+
+    if (consumed == (size_t)-1) {
+        /*
+         * The buffer contains an illegal multibyte sequence. There's
+         * no really good way to recover from this, so we'll just
+         * reset our input state, consume a single byte without
+         * emitting anything, and hope we can resynchronise to
+         * _something_ sooner or later.
+         */
+        memset(&scc->mbs_in, 0, sizeof(scc->mbs_in));
+        return 1;
+    }
+
+    if (consumed == 0) {
+        /*
+         * A zero wide character is encoded by the data, but mbrtowc
+         * hasn't told us how many input bytes it takes. There isn't
+         * really anything good we can do here, so we just advance by
+         * one byte in the hope that that was the NUL.
+         *
+         * (If it wasn't - that is, if we're in a multibyte encoding
+         * in which the terminator of a normal C string is encoded in
+         * some way other than a single zero byte - then probably lots
+         * of other things will have gone wrong before we get here!)
+         */
+        stripctrl_locale_put_wc(scc, L'\0');
+        return 1;
+    }
+
+    /*
+     * Otherwise, this is the easy case: consumed > 0, and we've eaten
+     * a valid multibyte character.
+     */
+    stripctrl_locale_put_wc(scc, wc);
+    return consumed;
+}
+
+static void stripctrl_locale_BinarySink_write(
+    BinarySink *bs, const void *vp, size_t len)
+{
+    StripCtrlChars *sccpub = BinarySink_DOWNCAST(bs, StripCtrlChars);
+    StripCtrlCharsImpl *scc =
+        container_of(sccpub, StripCtrlCharsImpl, public);
+    const char *p = (const char *)vp;
+
+    const char *previous_locale = setlocale(LC_CTYPE, NULL);
+    setlocale(LC_CTYPE, "");
+
+    /*
+     * Deal with any partial multibyte character buffered from last
+     * time.
+     */
+    while (scc->buflen > 0) {
+        size_t to_copy = SCC_BUFSIZE - scc->buflen;
+        if (to_copy > len)
+            to_copy = len;
+
+        memcpy(scc->buf + scc->buflen, p, to_copy);
+        size_t consumed = stripctrl_locale_try_consume(
+            scc, scc->buf, scc->buflen + to_copy);
+
+        if (consumed >= scc->buflen) {
+            /*
+             * We've consumed a multibyte character that includes all
+             * the data buffered from last time. So we can clear our
+             * buffer and move on to processing the main input string
+             * in situ, having first discarded whatever initial
+             * segment of it completed our previous character.
+             */
+            size_t consumed_from_main_string = consumed - scc->buflen;
+            assert(consumed_from_main_string <= len);
+            p += consumed_from_main_string;
+            len -= consumed_from_main_string;
+            scc->buflen = 0;
+            break;
+        }
+
+        if (consumed == 0) {
+            /*
+             * If we didn't manage to consume anything, i.e. the whole
+             * buffer contains an incomplete sequence, it had better
+             * be because our entire input string _this_ time plus
+             * whatever leftover data we had from _last_ time still
+             * comes to less than SCC_BUFSIZE. In other words, we've
+             * already copied all the new data on to the end of our
+             * buffer, and it still hasn't helped. So increment buflen
+             * to reflect the new data, and return.
+             */
+            assert(to_copy == len);
+            scc->buflen += to_copy;
+            goto out;
+        }
+
+        /*
+         * Otherwise, we've somehow consumed _less_ data than we had
+         * buffered, and yet we weren't able to consume that data in
+         * the last call to this function. That sounds impossible, but
+         * I can think of one situation in which it could happen: if
+         * we had an incomplete MB sequence last time, and now more
+         * data has arrived, it turns out to be an _illegal_ one, so
+         * we consume one byte in the hope of resynchronising.
+         *
+         * Anyway, in this case we move the buffer up and go back
+         * round this initial loop.
+         */
+        scc->buflen -= consumed;
+        memmove(scc->buf, scc->buf + consumed, scc->buflen);
+    }
+
+    /*
+     * Now charge along the main string.
+     */
+    while (len > 0) {
+        size_t consumed = stripctrl_locale_try_consume(scc, p, len);
+        if (consumed == 0)
+            break;
+        assert(consumed <= len);
+        p += consumed;
+        len -= consumed;
+    }
+
+    /*
+     * Any data remaining should be copied into our buffer, to keep
+     * for next time.
+     */
+    assert(len <= SCC_BUFSIZE);
+    memcpy(scc->buf, p, len);
+    scc->buflen = len;
+
+  out:
+    setlocale(LC_CTYPE, previous_locale);
+}
+
+static void stripctrl_term_BinarySink_write(
+    BinarySink *bs, const void *vp, size_t len)
+{
+    StripCtrlChars *sccpub = BinarySink_DOWNCAST(bs, StripCtrlChars);
+    StripCtrlCharsImpl *scc =
+        container_of(sccpub, StripCtrlCharsImpl, public);
+
+    bool utf = in_utf(scc->term);
+    if (utf != scc->last_term_utf) {
+        scc->last_term_utf = utf;
+        scc->utf8.state = 0;
+    }
+
+    for (const unsigned char *p = (const unsigned char *)vp;
+         len > 0; len--, p++) {
+        unsigned long t = scc->translate(scc->term, &scc->utf8, *p);
+        if (t == UCSTRUNCATED) {
+            stripctrl_term_put_wc(scc, 0xFFFD);
+            /* go round again */
+            t = scc->translate(scc->term, &scc->utf8, *p);
+        }
+        if (t == UCSINCOMPLETE)
+            continue;
+        if (t == UCSINVALID)
+            t = 0xFFFD;
+
+        stripctrl_term_put_wc(scc, t);
+    }
+}
+
+char *stripctrl_string_ptrlen(StripCtrlChars *sccpub, ptrlen str)
+{
+    strbuf *out = strbuf_new();
+    stripctrl_retarget(sccpub, BinarySink_UPCAST(out));
+    put_datapl(sccpub, str);
+    stripctrl_retarget(sccpub, NULL);
+    return strbuf_to_str(out);
+}
+
+#ifdef STRIPCTRL_TEST
+
+/*
+gcc -std=c99 -DSTRIPCTRL_TEST -o scctest stripctrl.c marshal.c utils.c memory.c wcwidth.c -I . -I unix -I charset
+*/
+
+void out_of_memory(void) { fprintf(stderr, "out of memory\n"); abort(); }
+
+void stripctrl_write(BinarySink *bs, const void *vdata, size_t len)
+{
+    const uint8_t *p = vdata;
+    printf("[");
+    for (size_t i = 0; i < len; i++)
+        printf("%*s%02x", i?1:0, "", (unsigned)p[i]);
+    printf("]");
+}
+
+void stripctrl_test(StripCtrlChars *scc, ptrlen pl)
+{
+    stripctrl_write(NULL, pl.ptr, pl.len);
+    printf(" -> ");
+    put_datapl(scc, pl);
+    printf("\n");
+}
+
+int main(void)
+{
+    struct foo { BinarySink_IMPLEMENTATION; } foo;
+    BinarySink_INIT(&foo, stripctrl_write);
+    StripCtrlChars *scc = stripctrl_new(BinarySink_UPCAST(&foo), false, '?');
+    stripctrl_test(scc, PTRLEN_LITERAL("a\033[1mb"));
+    stripctrl_test(scc, PTRLEN_LITERAL("a\xC2\x9B[1mb"));
+    stripctrl_test(scc, PTRLEN_LITERAL("a\xC2\xC2[1mb"));
+    stripctrl_test(scc, PTRLEN_LITERAL("\xC3"));
+    stripctrl_test(scc, PTRLEN_LITERAL("\xA9"));
+    stripctrl_test(scc, PTRLEN_LITERAL("\xE2\x80\x8F"));
+    stripctrl_test(scc, PTRLEN_LITERAL("a\0b"));
+    stripctrl_free(scc);
+    return 0;
+}
+
+#endif /* STRIPCTRL_TEST */