| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190 |
- /*
- * Copyright 2011-2012 Branimir Karadzic. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
- * SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
- * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
- package lz4
- import (
- "encoding/binary"
- "errors"
- )
- const (
- minMatch = 4
- hashLog = 17
- hashTableSize = 1 << hashLog
- hashShift = (minMatch * 8) - hashLog
- incompressible uint32 = 128
- uninitHash = 0x88888888
- // MaxInputSize is the largest buffer than can be compressed in a single block
- MaxInputSize = 0x7E000000
- )
- var (
- // ErrTooLarge indicates the input buffer was too large
- ErrTooLarge = errors.New("input too large")
- )
- type encoder struct {
- src []byte
- dst []byte
- hashTable []uint32
- pos uint32
- anchor uint32
- dpos uint32
- }
- // CompressBound returns the maximum length of a lz4 block, given it's uncompressed length
- func CompressBound(isize int) int {
- if isize > MaxInputSize {
- return 0
- }
- return isize + ((isize) / 255) + 16 + 4
- }
- func (e *encoder) writeLiterals(length, mlLen, pos uint32) {
- ln := length
- var code byte
- if ln > runMask-1 {
- code = runMask
- } else {
- code = byte(ln)
- }
- if mlLen > mlMask-1 {
- e.dst[e.dpos] = (code << mlBits) + byte(mlMask)
- } else {
- e.dst[e.dpos] = (code << mlBits) + byte(mlLen)
- }
- e.dpos++
- if code == runMask {
- ln -= runMask
- for ; ln > 254; ln -= 255 {
- e.dst[e.dpos] = 255
- e.dpos++
- }
- e.dst[e.dpos] = byte(ln)
- e.dpos++
- }
- for ii := uint32(0); ii < length; ii++ {
- e.dst[e.dpos+ii] = e.src[pos+ii]
- }
- e.dpos += length
- }
- // Encode returns the encoded form of src. The returned array may be a
- // sub-slice of dst if it was large enough to hold the entire output.
- func Encode(dst, src []byte) ([]byte, error) {
- if len(src) >= MaxInputSize {
- return nil, ErrTooLarge
- }
- if n := CompressBound(len(src)); len(dst) < n {
- dst = make([]byte, n)
- }
- e := encoder{src: src, dst: dst, hashTable: make([]uint32, hashTableSize)}
- binary.LittleEndian.PutUint32(dst, uint32(len(src)))
- e.dpos = 4
- var (
- step uint32 = 1
- limit = incompressible
- )
- for {
- if int(e.pos)+12 >= len(e.src) {
- e.writeLiterals(uint32(len(e.src))-e.anchor, 0, e.anchor)
- return e.dst[:e.dpos], nil
- }
- sequence := uint32(e.src[e.pos+3])<<24 | uint32(e.src[e.pos+2])<<16 | uint32(e.src[e.pos+1])<<8 | uint32(e.src[e.pos+0])
- hash := (sequence * 2654435761) >> hashShift
- ref := e.hashTable[hash] + uninitHash
- e.hashTable[hash] = e.pos - uninitHash
- if ((e.pos-ref)>>16) != 0 || uint32(e.src[ref+3])<<24|uint32(e.src[ref+2])<<16|uint32(e.src[ref+1])<<8|uint32(e.src[ref+0]) != sequence {
- if e.pos-e.anchor > limit {
- limit <<= 1
- step += 1 + (step >> 2)
- }
- e.pos += step
- continue
- }
- if step > 1 {
- e.hashTable[hash] = ref - uninitHash
- e.pos -= step - 1
- step = 1
- continue
- }
- limit = incompressible
- ln := e.pos - e.anchor
- back := e.pos - ref
- anchor := e.anchor
- e.pos += minMatch
- ref += minMatch
- e.anchor = e.pos
- for int(e.pos) < len(e.src)-5 && e.src[e.pos] == e.src[ref] {
- e.pos++
- ref++
- }
- mlLen := e.pos - e.anchor
- e.writeLiterals(ln, mlLen, anchor)
- e.dst[e.dpos] = uint8(back)
- e.dst[e.dpos+1] = uint8(back >> 8)
- e.dpos += 2
- if mlLen > mlMask-1 {
- mlLen -= mlMask
- for mlLen > 254 {
- mlLen -= 255
- e.dst[e.dpos] = 255
- e.dpos++
- }
- e.dst[e.dpos] = byte(mlLen)
- e.dpos++
- }
- e.anchor = e.pos
- }
- }
|