|
|
@@ -1,188 +1,188 @@
|
|
|
-/*
|
|
|
- * Copyright 2011-2012 Branimir Karadzic. All rights reserved.
|
|
|
- *
|
|
|
- * Redistribution and use in source and binary forms, with or without modification,
|
|
|
- * are permitted provided that the following conditions are met:
|
|
|
- *
|
|
|
- * 1. Redistributions of source code must retain the above copyright notice, this
|
|
|
- * list of conditions and the following disclaimer.
|
|
|
- *
|
|
|
- * 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
|
- * this list of conditions and the following disclaimer in the documentation
|
|
|
- * and/or other materials provided with the distribution.
|
|
|
- *
|
|
|
- * THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR
|
|
|
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
|
|
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
|
|
- * SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
|
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
|
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
|
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
|
|
- * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
|
|
- * THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
- */
|
|
|
-
|
|
|
-package lz4
|
|
|
-
|
|
|
-import "encoding/binary"
|
|
|
-import "errors"
|
|
|
-
|
|
|
-const (
|
|
|
- minMatch = 4
|
|
|
- hashLog = 17
|
|
|
- hashTableSize = 1 << hashLog
|
|
|
- hashShift = (minMatch * 8) - hashLog
|
|
|
- incompressible uint32 = 128
|
|
|
- uninitHash = 0x88888888
|
|
|
-
|
|
|
- // MaxInputSize is the largest buffer than can be compressed in a single block
|
|
|
- MaxInputSize = 0x7E000000
|
|
|
-)
|
|
|
-
|
|
|
-var (
|
|
|
- // ErrTooLarge indicates the input buffer was too large
|
|
|
- ErrTooLarge = errors.New("input too large")
|
|
|
-)
|
|
|
-
|
|
|
-type encoder struct {
|
|
|
- src []byte
|
|
|
- dst []byte
|
|
|
- hashTable []uint32
|
|
|
- pos uint32
|
|
|
- anchor uint32
|
|
|
- dpos uint32
|
|
|
-}
|
|
|
-
|
|
|
-// CompressBound returns the maximum length of a lz4 block, given it's uncompressed length
|
|
|
-func CompressBound(isize int) int {
|
|
|
- if isize > MaxInputSize {
|
|
|
- return 0
|
|
|
- }
|
|
|
- return isize + ((isize) / 255) + 16 + 4
|
|
|
-}
|
|
|
-
|
|
|
-func (e *encoder) writeLiterals(length, mlLen, pos uint32) {
|
|
|
-
|
|
|
- ln := length
|
|
|
-
|
|
|
- var code byte
|
|
|
- if ln > runMask-1 {
|
|
|
- code = runMask
|
|
|
- } else {
|
|
|
- code = byte(ln)
|
|
|
- }
|
|
|
-
|
|
|
- if mlLen > mlMask-1 {
|
|
|
- e.dst[e.dpos] = (code << mlBits) + byte(mlMask)
|
|
|
- } else {
|
|
|
- e.dst[e.dpos] = (code << mlBits) + byte(mlLen)
|
|
|
- }
|
|
|
- e.dpos++
|
|
|
-
|
|
|
- if code == runMask {
|
|
|
- ln -= runMask
|
|
|
- for ; ln > 254; ln -= 255 {
|
|
|
- e.dst[e.dpos] = 255
|
|
|
- e.dpos++
|
|
|
- }
|
|
|
-
|
|
|
- e.dst[e.dpos] = byte(ln)
|
|
|
- e.dpos++
|
|
|
- }
|
|
|
-
|
|
|
- for ii := uint32(0); ii < length; ii++ {
|
|
|
- e.dst[e.dpos+ii] = e.src[pos+ii]
|
|
|
- }
|
|
|
-
|
|
|
- e.dpos += length
|
|
|
-}
|
|
|
-
|
|
|
-// Encode returns the encoded form of src. The returned array may be a
|
|
|
-// sub-slice of dst if it was large enough to hold the entire output.
|
|
|
-func Encode(dst, src []byte) ([]byte, error) {
|
|
|
-
|
|
|
- if len(src) >= MaxInputSize {
|
|
|
- return nil, ErrTooLarge
|
|
|
- }
|
|
|
-
|
|
|
- if n := CompressBound(len(src)); len(dst) < n {
|
|
|
- dst = make([]byte, n)
|
|
|
- }
|
|
|
-
|
|
|
- e := encoder{src: src, dst: dst, hashTable: make([]uint32, hashTableSize)}
|
|
|
-
|
|
|
- binary.LittleEndian.PutUint32(dst, uint32(len(src)))
|
|
|
- e.dpos = 4
|
|
|
-
|
|
|
- var (
|
|
|
- step uint32 = 1
|
|
|
- limit = incompressible
|
|
|
- )
|
|
|
-
|
|
|
- for {
|
|
|
- if int(e.pos)+12 >= len(e.src) {
|
|
|
- e.writeLiterals(uint32(len(e.src))-e.anchor, 0, e.anchor)
|
|
|
- return e.dst[:e.dpos], nil
|
|
|
- }
|
|
|
-
|
|
|
- sequence := uint32(e.src[e.pos+3])<<24 | uint32(e.src[e.pos+2])<<16 | uint32(e.src[e.pos+1])<<8 | uint32(e.src[e.pos+0])
|
|
|
-
|
|
|
- hash := (sequence * 2654435761) >> hashShift
|
|
|
- ref := e.hashTable[hash] + uninitHash
|
|
|
- e.hashTable[hash] = e.pos - uninitHash
|
|
|
-
|
|
|
- if ((e.pos-ref)>>16) != 0 || uint32(e.src[ref+3])<<24|uint32(e.src[ref+2])<<16|uint32(e.src[ref+1])<<8|uint32(e.src[ref+0]) != sequence {
|
|
|
- if e.pos-e.anchor > limit {
|
|
|
- limit <<= 1
|
|
|
- step += 1 + (step >> 2)
|
|
|
- }
|
|
|
- e.pos += step
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- if step > 1 {
|
|
|
- e.hashTable[hash] = ref - uninitHash
|
|
|
- e.pos -= step - 1
|
|
|
- step = 1
|
|
|
- continue
|
|
|
- }
|
|
|
- limit = incompressible
|
|
|
-
|
|
|
- ln := e.pos - e.anchor
|
|
|
- back := e.pos - ref
|
|
|
-
|
|
|
- anchor := e.anchor
|
|
|
-
|
|
|
- e.pos += minMatch
|
|
|
- ref += minMatch
|
|
|
- e.anchor = e.pos
|
|
|
-
|
|
|
- for int(e.pos) < len(e.src)-5 && e.src[e.pos] == e.src[ref] {
|
|
|
- e.pos++
|
|
|
- ref++
|
|
|
- }
|
|
|
-
|
|
|
- mlLen := e.pos - e.anchor
|
|
|
-
|
|
|
- e.writeLiterals(ln, mlLen, anchor)
|
|
|
- e.dst[e.dpos] = uint8(back)
|
|
|
- e.dst[e.dpos+1] = uint8(back >> 8)
|
|
|
- e.dpos += 2
|
|
|
-
|
|
|
- if mlLen > mlMask-1 {
|
|
|
- mlLen -= mlMask
|
|
|
- for mlLen > 254 {
|
|
|
- mlLen -= 255
|
|
|
-
|
|
|
- e.dst[e.dpos] = 255
|
|
|
- e.dpos++
|
|
|
- }
|
|
|
-
|
|
|
- e.dst[e.dpos] = byte(mlLen)
|
|
|
- e.dpos++
|
|
|
- }
|
|
|
-
|
|
|
- e.anchor = e.pos
|
|
|
- }
|
|
|
-}
|
|
|
+/*
|
|
|
+ * Copyright 2011-2012 Branimir Karadzic. All rights reserved.
|
|
|
+ *
|
|
|
+ * Redistribution and use in source and binary forms, with or without modification,
|
|
|
+ * are permitted provided that the following conditions are met:
|
|
|
+ *
|
|
|
+ * 1. Redistributions of source code must retain the above copyright notice, this
|
|
|
+ * list of conditions and the following disclaimer.
|
|
|
+ *
|
|
|
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
|
+ * this list of conditions and the following disclaimer in the documentation
|
|
|
+ * and/or other materials provided with the distribution.
|
|
|
+ *
|
|
|
+ * THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR
|
|
|
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
|
|
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
|
|
+ * SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
|
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
|
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
|
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
|
|
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
|
|
+ * THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
+ */
|
|
|
+
|
|
|
+package lz4
|
|
|
+
|
|
|
+import "encoding/binary"
|
|
|
+import "errors"
|
|
|
+
|
|
|
+const (
|
|
|
+ minMatch = 4
|
|
|
+ hashLog = 17
|
|
|
+ hashTableSize = 1 << hashLog
|
|
|
+ hashShift = (minMatch * 8) - hashLog
|
|
|
+ incompressible uint32 = 128
|
|
|
+ uninitHash = 0x88888888
|
|
|
+
|
|
|
+ // MaxInputSize is the largest buffer than can be compressed in a single block
|
|
|
+ MaxInputSize = 0x7E000000
|
|
|
+)
|
|
|
+
|
|
|
+var (
|
|
|
+ // ErrTooLarge indicates the input buffer was too large
|
|
|
+ ErrTooLarge = errors.New("input too large")
|
|
|
+)
|
|
|
+
|
|
|
+type encoder struct {
|
|
|
+ src []byte
|
|
|
+ dst []byte
|
|
|
+ hashTable []uint32
|
|
|
+ pos uint32
|
|
|
+ anchor uint32
|
|
|
+ dpos uint32
|
|
|
+}
|
|
|
+
|
|
|
+// CompressBound returns the maximum length of a lz4 block, given it's uncompressed length
|
|
|
+func CompressBound(isize int) int {
|
|
|
+ if isize > MaxInputSize {
|
|
|
+ return 0
|
|
|
+ }
|
|
|
+ return isize + ((isize) / 255) + 16 + 4
|
|
|
+}
|
|
|
+
|
|
|
+func (e *encoder) writeLiterals(length, mlLen, pos uint32) {
|
|
|
+
|
|
|
+ ln := length
|
|
|
+
|
|
|
+ var code byte
|
|
|
+ if ln > runMask-1 {
|
|
|
+ code = runMask
|
|
|
+ } else {
|
|
|
+ code = byte(ln)
|
|
|
+ }
|
|
|
+
|
|
|
+ if mlLen > mlMask-1 {
|
|
|
+ e.dst[e.dpos] = (code << mlBits) + byte(mlMask)
|
|
|
+ } else {
|
|
|
+ e.dst[e.dpos] = (code << mlBits) + byte(mlLen)
|
|
|
+ }
|
|
|
+ e.dpos++
|
|
|
+
|
|
|
+ if code == runMask {
|
|
|
+ ln -= runMask
|
|
|
+ for ; ln > 254; ln -= 255 {
|
|
|
+ e.dst[e.dpos] = 255
|
|
|
+ e.dpos++
|
|
|
+ }
|
|
|
+
|
|
|
+ e.dst[e.dpos] = byte(ln)
|
|
|
+ e.dpos++
|
|
|
+ }
|
|
|
+
|
|
|
+ for ii := uint32(0); ii < length; ii++ {
|
|
|
+ e.dst[e.dpos+ii] = e.src[pos+ii]
|
|
|
+ }
|
|
|
+
|
|
|
+ e.dpos += length
|
|
|
+}
|
|
|
+
|
|
|
+// Encode returns the encoded form of src. The returned array may be a
|
|
|
+// sub-slice of dst if it was large enough to hold the entire output.
|
|
|
+func Encode(dst, src []byte) ([]byte, error) {
|
|
|
+
|
|
|
+ if len(src) >= MaxInputSize {
|
|
|
+ return nil, ErrTooLarge
|
|
|
+ }
|
|
|
+
|
|
|
+ if n := CompressBound(len(src)); len(dst) < n {
|
|
|
+ dst = make([]byte, n)
|
|
|
+ }
|
|
|
+
|
|
|
+ e := encoder{src: src, dst: dst, hashTable: make([]uint32, hashTableSize)}
|
|
|
+
|
|
|
+ binary.LittleEndian.PutUint32(dst, uint32(len(src)))
|
|
|
+ e.dpos = 4
|
|
|
+
|
|
|
+ var (
|
|
|
+ step uint32 = 1
|
|
|
+ limit = incompressible
|
|
|
+ )
|
|
|
+
|
|
|
+ for {
|
|
|
+ if int(e.pos)+12 >= len(e.src) {
|
|
|
+ e.writeLiterals(uint32(len(e.src))-e.anchor, 0, e.anchor)
|
|
|
+ return e.dst[:e.dpos], nil
|
|
|
+ }
|
|
|
+
|
|
|
+ sequence := uint32(e.src[e.pos+3])<<24 | uint32(e.src[e.pos+2])<<16 | uint32(e.src[e.pos+1])<<8 | uint32(e.src[e.pos+0])
|
|
|
+
|
|
|
+ hash := (sequence * 2654435761) >> hashShift
|
|
|
+ ref := e.hashTable[hash] + uninitHash
|
|
|
+ e.hashTable[hash] = e.pos - uninitHash
|
|
|
+
|
|
|
+ if ((e.pos-ref)>>16) != 0 || uint32(e.src[ref+3])<<24|uint32(e.src[ref+2])<<16|uint32(e.src[ref+1])<<8|uint32(e.src[ref+0]) != sequence {
|
|
|
+ if e.pos-e.anchor > limit {
|
|
|
+ limit <<= 1
|
|
|
+ step += 1 + (step >> 2)
|
|
|
+ }
|
|
|
+ e.pos += step
|
|
|
+ continue
|
|
|
+ }
|
|
|
+
|
|
|
+ if step > 1 {
|
|
|
+ e.hashTable[hash] = ref - uninitHash
|
|
|
+ e.pos -= step - 1
|
|
|
+ step = 1
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ limit = incompressible
|
|
|
+
|
|
|
+ ln := e.pos - e.anchor
|
|
|
+ back := e.pos - ref
|
|
|
+
|
|
|
+ anchor := e.anchor
|
|
|
+
|
|
|
+ e.pos += minMatch
|
|
|
+ ref += minMatch
|
|
|
+ e.anchor = e.pos
|
|
|
+
|
|
|
+ for int(e.pos) < len(e.src)-5 && e.src[e.pos] == e.src[ref] {
|
|
|
+ e.pos++
|
|
|
+ ref++
|
|
|
+ }
|
|
|
+
|
|
|
+ mlLen := e.pos - e.anchor
|
|
|
+
|
|
|
+ e.writeLiterals(ln, mlLen, anchor)
|
|
|
+ e.dst[e.dpos] = uint8(back)
|
|
|
+ e.dst[e.dpos+1] = uint8(back >> 8)
|
|
|
+ e.dpos += 2
|
|
|
+
|
|
|
+ if mlLen > mlMask-1 {
|
|
|
+ mlLen -= mlMask
|
|
|
+ for mlLen > 254 {
|
|
|
+ mlLen -= 255
|
|
|
+
|
|
|
+ e.dst[e.dpos] = 255
|
|
|
+ e.dpos++
|
|
|
+ }
|
|
|
+
|
|
|
+ e.dst[e.dpos] = byte(mlLen)
|
|
|
+ e.dpos++
|
|
|
+ }
|
|
|
+
|
|
|
+ e.anchor = e.pos
|
|
|
+ }
|
|
|
+}
|