zstd.go 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. // Copyright (c) Tailscale Inc & AUTHORS
  2. // SPDX-License-Identifier: BSD-3-Clause
  3. // Package zstdframe provides functionality for encoding and decoding
  4. // independently compressed zstandard frames.
  5. package zstdframe
  6. import (
  7. "encoding/binary"
  8. "io"
  9. "github.com/klauspost/compress/zstd"
  10. )
  11. // The Go zstd API surface is not ergonomic:
  12. //
  13. // - Options are set via NewReader and NewWriter and immutable once set.
  14. //
  15. // - Stateless operations like EncodeAll and DecodeAll are methods on
  16. // the Encoder and Decoder types, which implies that options cannot be
  17. // changed without allocating an entirely new Encoder or Decoder.
  18. //
  19. // This is further strange as Encoder and Decoder types are either
  20. // stateful or stateless objects depending on semantic context.
  21. //
  22. // - By default, the zstd package tries to be overly clever by spawning off
  23. // multiple goroutines to do work, which can lead to both excessive fanout
  24. // of resources and also subtle race conditions. Also, each Encoder/Decoder
  25. // never relinquish resources, which makes it unsuitable for lower memory.
  26. // We work around the zstd defaults by setting concurrency=1 on each coder
  27. // and pool individual coders, allowing the Go GC to reclaim unused coders.
  28. //
  29. // See https://github.com/klauspost/compress/issues/264
  30. // See https://github.com/klauspost/compress/issues/479
  31. //
  32. // - The EncodeAll and DecodeAll functions appends to a user-provided buffer,
  33. // but uses a signature opposite of most append-like functions in Go,
  34. // where the output buffer is the second argument, leading to footguns.
  35. // The zstdframe package provides AppendEncode and AppendDecode functions
  36. // that follows Go convention of the first argument being the output buffer
  37. // similar to how the builtin append function operates.
  38. //
  39. // See https://github.com/klauspost/compress/issues/648
  40. //
  41. // - The zstd package is oddly inconsistent about naming. For example,
  42. // IgnoreChecksum vs WithEncoderCRC, or
  43. // WithDecoderLowmem vs WithLowerEncoderMem.
  44. // Most options have a WithDecoder or WithEncoder prefix, but some do not.
  45. //
  46. // The zstdframe package wraps the zstd package and presents a more ergonomic API
  47. // by providing stateless functions that take in variadic options.
  48. // Pooling of resources is handled by this package to avoid each caller
  49. // redundantly performing the same pooling at different call sites.
  50. // TODO: Since compression is CPU bound,
  51. // should we have a semaphore ensure at most one operation per CPU?
  52. // AppendEncode appends the zstandard encoded content of src to dst.
  53. // It emits exactly one frame as a single segment.
  54. func AppendEncode(dst, src []byte, opts ...Option) []byte {
  55. enc := getEncoder(opts...)
  56. defer putEncoder(enc)
  57. return enc.EncodeAll(src, dst)
  58. }
  59. // AppendDecode appends the zstandard decoded content of src to dst.
  60. // The input may consist of zero or more frames.
  61. // Any call that handles untrusted input should specify [MaxDecodedSize].
  62. func AppendDecode(dst, src []byte, opts ...Option) ([]byte, error) {
  63. dec := getDecoder(opts...)
  64. defer putDecoder(dec)
  65. return dec.DecodeAll(src, dst)
  66. }
  67. // NextSize parses the next frame (regardless of whether it is a
  68. // data frame or a metadata frame) and returns the total size of the frame.
  69. // The frame can be skipped by slicing n bytes from b (e.g., b[n:]).
  70. // It report [io.ErrUnexpectedEOF] if the frame is incomplete.
  71. func NextSize(b []byte) (n int, err error) {
  72. // Parse the frame header (RFC 8878, section 3.1.1.).
  73. var frame zstd.Header
  74. if err := frame.Decode(b); err != nil {
  75. return n, err
  76. }
  77. n += frame.HeaderSize
  78. if frame.Skippable {
  79. // Handle skippable frame (RFC 8878, section 3.1.2.).
  80. if len(b[n:]) < int(frame.SkippableSize) {
  81. return n, io.ErrUnexpectedEOF
  82. }
  83. n += int(frame.SkippableSize)
  84. } else {
  85. // Handle one or more Data_Blocks (RFC 8878, section 3.1.1.2.).
  86. for {
  87. if len(b[n:]) < 3 {
  88. return n, io.ErrUnexpectedEOF
  89. }
  90. blockHeader := binary.LittleEndian.Uint32(b[n-1:]) >> 8 // load uint24
  91. lastBlock := (blockHeader >> 0) & ((1 << 1) - 1)
  92. blockType := (blockHeader >> 1) & ((1 << 2) - 1)
  93. blockSize := (blockHeader >> 3) & ((1 << 21) - 1)
  94. n += 3
  95. if blockType == 1 {
  96. // For RLE_Block (RFC 8878, section 3.1.1.2.2.),
  97. // the Block_Content is only a single byte.
  98. blockSize = 1
  99. }
  100. if len(b[n:]) < int(blockSize) {
  101. return n, io.ErrUnexpectedEOF
  102. }
  103. n += int(blockSize)
  104. if lastBlock != 0 {
  105. break
  106. }
  107. }
  108. // Handle optional Content_Checksum (RFC 8878, section 3.1.1.).
  109. if frame.HasCheckSum {
  110. if len(b[n:]) < 4 {
  111. return n, io.ErrUnexpectedEOF
  112. }
  113. n += 4
  114. }
  115. }
  116. return n, nil
  117. }