Browse Source

util: add truncate package (#7490)

This package handles cases where we need to truncate human-readable text to fit
a length constraint without leaving "ragged" multi-byte rune fragments at the
end of the truncated value.

Change-Id: Id972135d1880485f41b1fedfb65c2b8cc012d416
Signed-off-by: M. J. Fromberger <[email protected]>
M. J. Fromberger 3 years ago
parent
commit
a75360ccd6
2 changed files with 67 additions and 0 deletions
  1. 31 0
      util/truncate/truncate.go
  2. 36 0
      util/truncate/truncate_test.go

+ 31 - 0
util/truncate/truncate.go

@@ -0,0 +1,31 @@
+// Copyright (c) Tailscale Inc & AUTHORS
+// SPDX-License-Identifier: BSD-3-Clause
+
+// Package truncate provides a utility function for safely truncating UTF-8
+// strings to a fixed length, respecting multi-byte codepoints.
+package truncate
+
+// String returns a prefix of a UTF-8 string s, having length no greater than n
+// bytes. If s exceeds this length, it is truncated at a point ≤ n so that the
+// result does not end in a partial UTF-8 encoding.  If s is less than or equal
+// to this length, it is returned unmodified.
+func String(s string, n int) string {
+	if n >= len(s) {
+		return s
+	}
+
+	// Back up until we find the beginning of a UTF-8 encoding.
+	for n > 0 && s[n-1]&0xc0 == 0x80 { // 0x10... is a continuation byte
+		n--
+	}
+
+	// If we're at the beginning of a multi-byte encoding, back up one more to
+	// skip it. It's possible the value was already complete, but it's simpler
+	// if we only have to check in one direction.
+	//
+	// Otherwise, we have a single-byte code (0x00... or 0x01...).
+	if n > 0 && s[n-1]&0xc0 == 0xc0 { // 0x11... starts a multibyte encoding
+		n--
+	}
+	return s[:n]
+}

+ 36 - 0
util/truncate/truncate_test.go

@@ -0,0 +1,36 @@
+// Copyright (c) Tailscale Inc & AUTHORS
+// SPDX-License-Identifier: BSD-3-Clause
+
+package truncate_test
+
+import (
+	"testing"
+
+	"tailscale.com/util/truncate"
+)
+
+func TestString(t *testing.T) {
+	tests := []struct {
+		input string
+		size  int
+		want  string
+	}{
+		{"", 1000, ""},                 // n > length
+		{"abc", 4, "abc"},              // n > length
+		{"abc", 3, "abc"},              // n == length
+		{"abcdefg", 4, "abcd"},         // n < length, safe
+		{"abcdefg", 0, ""},             // n < length, safe
+		{"abc\U0001fc2d", 3, "abc"},    // n < length, at boundary
+		{"abc\U0001fc2d", 4, "abc"},    // n < length, mid-rune
+		{"abc\U0001fc2d", 5, "abc"},    // n < length, mid-rune
+		{"abc\U0001fc2d", 6, "abc"},    // n < length, mid-rune
+		{"abc\U0001fc2defg", 7, "abc"}, // n < length, cut multibyte
+	}
+
+	for _, tc := range tests {
+		got := truncate.String(tc.input, tc.size)
+		if got != tc.want {
+			t.Errorf("truncate(%q, %d): got %q, want %q", tc.input, tc.size, got, tc.want)
+		}
+	}
+}