zstd.go 3.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. // Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package smallzstd produces zstd encoders and decoders optimized for
  5. // low memory usage, at the expense of compression efficiency.
  6. //
  7. // This package is optimized primarily for the memory cost of
  8. // compressing and decompressing data. We reduce this cost in two
  9. // major ways: disable parallelism within the library (i.e. don't use
  10. // multiple CPU cores to decompress), and drop the compression window
  11. // down from the defaults of 4-16MiB, to 8kiB.
  12. //
  13. // Decompressors cost 2x the window size in RAM to run, so by using an
  14. // 8kiB window, we can run ~1000 more decompressors per unit of memory
  15. // than with the defaults.
  16. //
  17. // Depending on context, the benefit is either being able to run more
  18. // decoders (e.g. in our logs processing system), or having a lower
  19. // memory footprint when using compression in network protocols
  20. // (e.g. in tailscaled, which should have a minimal RAM cost).
  21. package smallzstd
  22. import (
  23. "io"
  24. "github.com/klauspost/compress/zstd"
  25. )
  26. // WindowSize is the window size used for zstd compression. Decoder
  27. // memory usage scales linearly with WindowSize.
  28. const WindowSize = 8 << 10 // 8kiB
  29. // NewDecoder returns a zstd.Decoder configured for low memory usage,
  30. // at the expense of decompression performance.
  31. func NewDecoder(r io.Reader, options ...zstd.DOption) (*zstd.Decoder, error) {
  32. defaults := []zstd.DOption{
  33. // Default is GOMAXPROCS, which costs many KiB in stacks.
  34. zstd.WithDecoderConcurrency(1),
  35. // Default is to allocate more upfront for performance. We
  36. // prefer lower memory use and a bit of GC load.
  37. zstd.WithDecoderLowmem(true),
  38. // You might expect to see zstd.WithDecoderMaxMemory
  39. // here. However, it's not terribly safe to use if you're
  40. // doing stateless decoding, because it sets the maximum
  41. // amount of memory the decompressed data can occupy, rather
  42. // than the window size of the zstd stream. This means a very
  43. // compressible piece of data might violate the max memory
  44. // limit here, even if the window size (and thus total memory
  45. // required to decompress the data) is small.
  46. //
  47. // As a result, we don't set a decoder limit here, and rely on
  48. // the encoder below producing "cheap" streams. Callers are
  49. // welcome to set their own max memory setting, if
  50. // contextually there is a clearly correct value (e.g. it's
  51. // known from the upper layer protocol that the decoded data
  52. // can never be more than 1MiB).
  53. }
  54. return zstd.NewReader(r, append(defaults, options...)...)
  55. }
  56. // NewEncoder returns a zstd.Encoder configured for low memory usage,
  57. // both during compression and at decompression time, at the expense
  58. // of performance and compression efficiency.
  59. func NewEncoder(w io.Writer, options ...zstd.EOption) (*zstd.Encoder, error) {
  60. defaults := []zstd.EOption{
  61. // Default is GOMAXPROCS, which costs many KiB in stacks.
  62. zstd.WithEncoderConcurrency(1),
  63. // Default is several MiB, which bloats both encoders and
  64. // their corresponding decoders.
  65. zstd.WithWindowSize(WindowSize),
  66. // Encode zero-length inputs in a way that the `zstd` utility
  67. // can read, because interoperability is handy.
  68. zstd.WithZeroFrames(true),
  69. }
  70. return zstd.NewWriter(w, append(defaults, options...)...)
  71. }