writer.go 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. /*
  2. * Copyright 2011-2012 Branimir Karadzic. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without modification,
  5. * are permitted provided that the following conditions are met:
  6. *
  7. * 1. Redistributions of source code must retain the above copyright notice, this
  8. * list of conditions and the following disclaimer.
  9. *
  10. * 2. Redistributions in binary form must reproduce the above copyright notice,
  11. * this list of conditions and the following disclaimer in the documentation
  12. * and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR
  15. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  16. * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
  17. * SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  18. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  19. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  20. * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  21. * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  22. * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  23. * THE POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. package lz4
  26. import (
  27. "encoding/binary"
  28. "errors"
  29. )
  30. const (
  31. minMatch = 4
  32. hashLog = 17
  33. hashTableSize = 1 << hashLog
  34. hashShift = (minMatch * 8) - hashLog
  35. incompressible uint32 = 128
  36. uninitHash = 0x88888888
  37. // MaxInputSize is the largest buffer than can be compressed in a single block
  38. MaxInputSize = 0x7E000000
  39. )
  40. var (
  41. // ErrTooLarge indicates the input buffer was too large
  42. ErrTooLarge = errors.New("input too large")
  43. )
  44. type encoder struct {
  45. src []byte
  46. dst []byte
  47. hashTable []uint32
  48. pos uint32
  49. anchor uint32
  50. dpos uint32
  51. }
  52. // CompressBound returns the maximum length of a lz4 block, given it's uncompressed length
  53. func CompressBound(isize int) int {
  54. if isize > MaxInputSize {
  55. return 0
  56. }
  57. return isize + ((isize) / 255) + 16 + 4
  58. }
  59. func (e *encoder) writeLiterals(length, mlLen, pos uint32) {
  60. ln := length
  61. var code byte
  62. if ln > runMask-1 {
  63. code = runMask
  64. } else {
  65. code = byte(ln)
  66. }
  67. if mlLen > mlMask-1 {
  68. e.dst[e.dpos] = (code << mlBits) + byte(mlMask)
  69. } else {
  70. e.dst[e.dpos] = (code << mlBits) + byte(mlLen)
  71. }
  72. e.dpos++
  73. if code == runMask {
  74. ln -= runMask
  75. for ; ln > 254; ln -= 255 {
  76. e.dst[e.dpos] = 255
  77. e.dpos++
  78. }
  79. e.dst[e.dpos] = byte(ln)
  80. e.dpos++
  81. }
  82. for ii := uint32(0); ii < length; ii++ {
  83. e.dst[e.dpos+ii] = e.src[pos+ii]
  84. }
  85. e.dpos += length
  86. }
  87. // Encode returns the encoded form of src. The returned array may be a
  88. // sub-slice of dst if it was large enough to hold the entire output.
  89. func Encode(dst, src []byte) ([]byte, error) {
  90. if len(src) >= MaxInputSize {
  91. return nil, ErrTooLarge
  92. }
  93. if n := CompressBound(len(src)); len(dst) < n {
  94. dst = make([]byte, n)
  95. }
  96. e := encoder{src: src, dst: dst, hashTable: make([]uint32, hashTableSize)}
  97. binary.LittleEndian.PutUint32(dst, uint32(len(src)))
  98. e.dpos = 4
  99. var (
  100. step uint32 = 1
  101. limit = incompressible
  102. )
  103. for {
  104. if int(e.pos)+12 >= len(e.src) {
  105. e.writeLiterals(uint32(len(e.src))-e.anchor, 0, e.anchor)
  106. return e.dst[:e.dpos], nil
  107. }
  108. sequence := uint32(e.src[e.pos+3])<<24 | uint32(e.src[e.pos+2])<<16 | uint32(e.src[e.pos+1])<<8 | uint32(e.src[e.pos+0])
  109. hash := (sequence * 2654435761) >> hashShift
  110. ref := e.hashTable[hash] + uninitHash
  111. e.hashTable[hash] = e.pos - uninitHash
  112. if ((e.pos-ref)>>16) != 0 || uint32(e.src[ref+3])<<24|uint32(e.src[ref+2])<<16|uint32(e.src[ref+1])<<8|uint32(e.src[ref+0]) != sequence {
  113. if e.pos-e.anchor > limit {
  114. limit <<= 1
  115. step += 1 + (step >> 2)
  116. }
  117. e.pos += step
  118. continue
  119. }
  120. if step > 1 {
  121. e.hashTable[hash] = ref - uninitHash
  122. e.pos -= step - 1
  123. step = 1
  124. continue
  125. }
  126. limit = incompressible
  127. ln := e.pos - e.anchor
  128. back := e.pos - ref
  129. anchor := e.anchor
  130. e.pos += minMatch
  131. ref += minMatch
  132. e.anchor = e.pos
  133. for int(e.pos) < len(e.src)-5 && e.src[e.pos] == e.src[ref] {
  134. e.pos++
  135. ref++
  136. }
  137. mlLen := e.pos - e.anchor
  138. e.writeLiterals(ln, mlLen, anchor)
  139. e.dst[e.dpos] = uint8(back)
  140. e.dst[e.dpos+1] = uint8(back >> 8)
  141. e.dpos += 2
  142. if mlLen > mlMask-1 {
  143. mlLen -= mlMask
  144. for mlLen > 254 {
  145. mlLen -= 255
  146. e.dst[e.dpos] = 255
  147. e.dpos++
  148. }
  149. e.dst[e.dpos] = byte(mlLen)
  150. e.dpos++
  151. }
  152. e.anchor = e.pos
  153. }
  154. }