Переглянути джерело

vendor: Add github.com/cznic/lldb and friends (new recursive dependency)

Jakob Borg 9 роки тому
батько
коміт
81d19a00aa

+ 27 - 0
vendor/github.com/cznic/internal/buffer/LICENSE

@@ -0,0 +1,27 @@
+Copyright (c) 2016 The Internal Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the names of the authors nor the names of the
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ 55 - 0
vendor/github.com/cznic/internal/buffer/buffer.go

@@ -0,0 +1,55 @@
+// Copyright 2016 The Internal Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package buffer implements a pool of pointers to byte slices.
+//
+// Example usage pattern
+//
+//	p := buffer.Get(size)
+//	b := *p	// Now you can use b in any way you need.
+//	...
+//	// When b will not be used anymore
+//	buffer.Put(p)
+//	...
+//	// If b or p are not going out of scope soon, optionally
+//	b = nil
+//	p = nil
+//
+// Otherwise the pool cannot release the buffer on garbage collection.
+//
+// Do not do
+//
+//	p := buffer.Get(size)
+//	b := *p
+//	...
+//	buffer.Put(&b)
+//
+// or
+//
+//	b := *buffer.Get(size)
+//	...
+//	buffer.Put(&b)
+package buffer
+
+import (
+	"github.com/cznic/internal/slice"
+)
+
+// CGet returns a pointer to a byte slice of len size. The pointed to byte
+// slice is zeroed up to its cap. CGet panics for size < 0.
+//
+// CGet is safe for concurrent use by multiple goroutines.
+func CGet(size int) *[]byte { return slice.Bytes.CGet(size).(*[]byte) }
+
+// Get returns a pointer to a byte slice of len size. The pointed to byte slice
+// is not zeroed. Get panics for size < 0.
+//
+// Get is safe for concurrent use by multiple goroutines.
+func Get(size int) *[]byte { return slice.Bytes.Get(size).(*[]byte) }
+
+// Put puts a pointer to a byte slice into a pool for possible later reuse by
+// CGet or Get.
+//
+// Put is safe for concurrent use by multiple goroutines.
+func Put(p *[]byte) { slice.Bytes.Put(p) }

+ 27 - 0
vendor/github.com/cznic/internal/file/LICENSE

@@ -0,0 +1,27 @@
+Copyright (c) 2016 The Internal Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the names of the authors nor the names of the
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ 434 - 0
vendor/github.com/cznic/internal/file/file.go

@@ -0,0 +1,434 @@
+// Copyright 2016 The Internal Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package file provides an os.File-like interface of a memory mapped file.
+package file
+
+import (
+	"fmt"
+	"io"
+	"os"
+	"time"
+
+	"github.com/cznic/fileutil"
+	"github.com/cznic/internal/buffer"
+	"github.com/cznic/mathutil"
+	"github.com/edsrzf/mmap-go"
+)
+
+const copyBufSize = 1 << 20 // 1 MB.
+
+var (
+	_ Interface = (*mem)(nil)
+	_ Interface = (*file)(nil)
+
+	_ os.FileInfo = stat{}
+
+	sysPage = os.Getpagesize()
+)
+
+// Interface is a os.File-like entity.
+type Interface interface {
+	io.ReaderAt
+	io.ReaderFrom
+	io.WriterAt
+	io.WriterTo
+
+	Close() error
+	Stat() (os.FileInfo, error)
+	Sync() error
+	Truncate(int64) error
+}
+
+// Open returns a new Interface backed by f, or an error, if any.
+func Open(f *os.File) (Interface, error) { return newFile(f, 1<<30, 20) }
+
+// OpenMem returns a new Interface, or an error, if any. The Interface content
+// is volatile, it's backed only by process' memory.
+func OpenMem(name string) (Interface, error) { return newMem(name, 18), nil }
+
+type memMap map[int64]*[]byte
+
+type mem struct {
+	m       memMap
+	modTime time.Time
+	name    string
+	pgBits  uint
+	pgMask  int
+	pgSize  int
+	size    int64
+}
+
+func newMem(name string, pgBits uint) *mem {
+	pgSize := 1 << pgBits
+	return &mem{
+		m:       memMap{},
+		modTime: time.Now(),
+		name:    name,
+		pgBits:  pgBits,
+		pgMask:  pgSize - 1,
+		pgSize:  pgSize,
+	}
+}
+
+func (f *mem) IsDir() bool                               { return false }
+func (f *mem) Mode() os.FileMode                         { return os.ModeTemporary + 0600 }
+func (f *mem) ModTime() time.Time                        { return f.modTime }
+func (f *mem) Name() string                              { return f.name }
+func (f *mem) ReadFrom(r io.Reader) (n int64, err error) { return readFrom(f, r) }
+func (f *mem) Size() (n int64)                           { return f.size }
+func (f *mem) Stat() (os.FileInfo, error)                { return f, nil }
+func (f *mem) Sync() error                               { return nil }
+func (f *mem) Sys() interface{}                          { return nil }
+func (f *mem) WriteTo(w io.Writer) (n int64, err error)  { return writeTo(f, w) }
+
+func (f *mem) Close() error {
+	f.Truncate(0)
+	f.m = nil
+	return nil
+}
+
+func (f *mem) ReadAt(b []byte, off int64) (n int, err error) {
+	avail := f.size - off
+	pi := off >> f.pgBits
+	po := int(off) & f.pgMask
+	rem := len(b)
+	if int64(rem) >= avail {
+		rem = int(avail)
+		err = io.EOF
+	}
+	var zeroPage *[]byte
+	for rem != 0 && avail > 0 {
+		pg := f.m[pi]
+		if pg == nil {
+			if zeroPage == nil {
+				zeroPage = buffer.CGet(f.pgSize)
+				defer buffer.Put(zeroPage)
+			}
+			pg = zeroPage
+		}
+		nc := copy(b[:mathutil.Min(rem, f.pgSize)], (*pg)[po:])
+		pi++
+		po = 0
+		rem -= nc
+		n += nc
+		b = b[nc:]
+	}
+	return n, err
+}
+
+func (f *mem) Truncate(size int64) (err error) {
+	if size < 0 {
+		return fmt.Errorf("invalid truncate size: %d", size)
+	}
+
+	first := size >> f.pgBits
+	if size&int64(f.pgMask) != 0 {
+		first++
+	}
+	last := f.size >> f.pgBits
+	if f.size&int64(f.pgMask) != 0 {
+		last++
+	}
+	for ; first <= last; first++ {
+		if p := f.m[first]; p != nil {
+			buffer.Put(p)
+		}
+		delete(f.m, first)
+	}
+
+	f.size = size
+	return nil
+}
+
+func (f *mem) WriteAt(b []byte, off int64) (n int, err error) {
+	pi := off >> f.pgBits
+	po := int(off) & f.pgMask
+	n = len(b)
+	rem := n
+	var nc int
+	for rem != 0 {
+		pg := f.m[pi]
+		if pg == nil {
+			pg = buffer.CGet(f.pgSize)
+			f.m[pi] = pg
+		}
+		nc = copy((*pg)[po:], b)
+		pi++
+		po = 0
+		rem -= nc
+		b = b[nc:]
+	}
+	f.size = mathutil.MaxInt64(f.size, off+int64(n))
+	return n, nil
+}
+
+type stat struct {
+	os.FileInfo
+	size int64
+}
+
+func (s stat) Size() int64 { return s.size }
+
+type fileMap map[int64]mmap.MMap
+
+type file struct {
+	f        *os.File
+	m        fileMap
+	maxPages int
+	pgBits   uint
+	pgMask   int
+	pgSize   int
+	size     int64
+	fsize    int64
+}
+
+func newFile(f *os.File, maxSize int64, pgBits uint) (*file, error) {
+	if maxSize < 0 {
+		panic("internal error")
+	}
+
+	pgSize := 1 << pgBits
+	switch {
+	case sysPage > pgSize:
+		pgBits = uint(mathutil.Log2Uint64(uint64(sysPage)))
+	default:
+		pgBits = uint(mathutil.Log2Uint64(uint64(pgSize / sysPage * sysPage)))
+	}
+	pgSize = 1 << pgBits
+	fi := &file{
+		f: f,
+		m: fileMap{},
+		maxPages: int(mathutil.MinInt64(
+			1024,
+			mathutil.MaxInt64(maxSize/int64(pgSize), 1)),
+		),
+		pgBits: pgBits,
+		pgMask: pgSize - 1,
+		pgSize: pgSize,
+	}
+	info, err := f.Stat()
+	if err != nil {
+		return nil, err
+	}
+
+	if err = fi.Truncate(info.Size()); err != nil {
+		return nil, err
+	}
+
+	return fi, nil
+}
+
+func (f *file) ReadFrom(r io.Reader) (n int64, err error) { return readFrom(f, r) }
+func (f *file) Sync() (err error)                         { return f.f.Sync() }
+func (f *file) WriteTo(w io.Writer) (n int64, err error)  { return writeTo(f, w) }
+
+func (f *file) Close() (err error) {
+	for _, p := range f.m {
+		if err = p.Unmap(); err != nil {
+			return err
+		}
+	}
+
+	if err = f.f.Truncate(f.size); err != nil {
+		return err
+	}
+
+	if err = f.f.Sync(); err != nil {
+		return err
+	}
+
+	if err = f.f.Close(); err != nil {
+		return err
+	}
+
+	f.m = nil
+	f.f = nil
+	return nil
+}
+
+func (f *file) page(index int64) (mmap.MMap, error) {
+	if len(f.m) == f.maxPages {
+		for i, p := range f.m {
+			if err := p.Unmap(); err != nil {
+				return nil, err
+			}
+
+			delete(f.m, i)
+			break
+		}
+	}
+
+	off := index << f.pgBits
+	fsize := off + int64(f.pgSize)
+	if fsize > f.fsize {
+		if err := f.f.Truncate(fsize); err != nil {
+			return nil, err
+		}
+
+		f.fsize = fsize
+	}
+	p, err := mmap.MapRegion(f.f, f.pgSize, mmap.RDWR, 0, off)
+	if err != nil {
+		return nil, err
+	}
+
+	f.m[index] = p
+	return p, nil
+}
+
+func (f *file) ReadAt(b []byte, off int64) (n int, err error) {
+	avail := f.size - off
+	pi := off >> f.pgBits
+	po := int(off) & f.pgMask
+	rem := len(b)
+	if int64(rem) >= avail {
+		rem = int(avail)
+		err = io.EOF
+	}
+	for rem != 0 && avail > 0 {
+		pg := f.m[pi]
+		if pg == nil {
+			if pg, err = f.page(pi); err != nil {
+				return n, err
+			}
+		}
+		nc := copy(b[:mathutil.Min(rem, f.pgSize)], pg[po:])
+		pi++
+		po = 0
+		rem -= nc
+		n += nc
+		b = b[nc:]
+	}
+	return n, err
+}
+
+func (f *file) Stat() (os.FileInfo, error) {
+	fi, err := f.f.Stat()
+	if err != nil {
+		return nil, err
+	}
+
+	return stat{fi, f.size}, nil
+}
+
+func (f *file) Truncate(size int64) (err error) {
+	if size < 0 {
+		return fmt.Errorf("invalid truncate size: %d", size)
+	}
+
+	first := size >> f.pgBits
+	if size&int64(f.pgMask) != 0 {
+		first++
+	}
+	last := f.size >> f.pgBits
+	if f.size&int64(f.pgMask) != 0 {
+		last++
+	}
+	for ; first <= last; first++ {
+		if p := f.m[first]; p != nil {
+			if err := p.Unmap(); err != nil {
+				return err
+			}
+		}
+
+		delete(f.m, first)
+	}
+
+	f.size = size
+	fsize := (size + int64(f.pgSize) - 1) &^ int64(f.pgMask)
+	if fsize != f.fsize {
+		if err := f.f.Truncate(fsize); err != nil {
+			return err
+		}
+
+	}
+	f.fsize = fsize
+	return nil
+}
+
+func (f *file) WriteAt(b []byte, off int64) (n int, err error) {
+	pi := off >> f.pgBits
+	po := int(off) & f.pgMask
+	n = len(b)
+	rem := n
+	var nc int
+	for rem != 0 {
+		pg := f.m[pi]
+		if pg == nil {
+			pg, err = f.page(pi)
+			if err != nil {
+				return n, err
+			}
+		}
+		nc = copy(pg[po:], b)
+		pi++
+		po = 0
+		rem -= nc
+		b = b[nc:]
+	}
+	f.size = mathutil.MaxInt64(f.size, off+int64(n))
+	return n, nil
+}
+
+// ----------------------------------------------------------------------------
+
+func readFrom(f Interface, r io.Reader) (n int64, err error) {
+	f.Truncate(0)
+	p := buffer.Get(copyBufSize)
+	b := *p
+	defer buffer.Put(p)
+
+	var off int64
+	var werr error
+	for {
+		rn, rerr := r.Read(b)
+		if rn != 0 {
+			_, werr = f.WriteAt(b[:rn], off)
+			n += int64(rn)
+			off += int64(rn)
+		}
+		if rerr != nil {
+			if !fileutil.IsEOF(rerr) {
+				err = rerr
+			}
+			break
+		}
+
+		if werr != nil {
+			err = werr
+			break
+		}
+	}
+	return n, err
+}
+
+func writeTo(f Interface, w io.Writer) (n int64, err error) {
+	p := buffer.Get(copyBufSize)
+	b := *p
+	defer buffer.Put(p)
+
+	var off int64
+	var werr error
+	for {
+		rn, rerr := f.ReadAt(b, off)
+		if rn != 0 {
+			_, werr = w.Write(b[:rn])
+			n += int64(rn)
+			off += int64(rn)
+		}
+		if rerr != nil {
+			if !fileutil.IsEOF(rerr) {
+				err = rerr
+			}
+			break
+		}
+
+		if werr != nil {
+			err = werr
+			break
+		}
+	}
+	return n, err
+}

+ 27 - 0
vendor/github.com/cznic/internal/slice/LICENSE

@@ -0,0 +1,27 @@
+Copyright (c) 2016 The Internal Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the names of the authors nor the names of the
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ 173 - 0
vendor/github.com/cznic/internal/slice/pool.go

@@ -0,0 +1,173 @@
+// Copyright 2016 The Internal Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package slice implements pools of pointers to slices.
+package slice
+
+import (
+	"sync"
+
+	"github.com/cznic/mathutil"
+)
+
+var (
+	// Bytes is a ready to use *[]byte Pool.
+	Bytes *Pool
+	// Ints is a ready to use *[]int Pool.
+	Ints *Pool
+)
+
+func init() {
+	Bytes = newBytes()
+	Ints = NewPool(
+		func(size int) interface{} { // create
+			b := make([]int, size)
+			return &b
+		},
+		func(s interface{}) { // clear
+			b := *s.(*[]int)
+			b = b[:cap(b)]
+			for i := range b {
+				b[i] = 0
+			}
+		},
+		func(s interface{}, size int) { // setSize
+			p := s.(*[]int)
+			*p = (*p)[:size]
+		},
+		func(s interface{}) int { return cap(*s.(*[]int)) }, // cap
+	)
+}
+
+func newBytes() *Pool {
+	return NewPool(
+		func(size int) interface{} { // create
+			b := make([]byte, size)
+			return &b
+		},
+		func(s interface{}) { // clear
+			b := *s.(*[]byte)
+			b = b[:cap(b)]
+			for i := range b {
+				b[i] = 0
+			}
+		},
+		func(s interface{}, size int) { // setSize
+			p := s.(*[]byte)
+			*p = (*p)[:size]
+		},
+		func(s interface{}) int { return cap(*s.(*[]byte)) }, // cap
+	)
+}
+
+// Pool implements a pool of pointers to slices.
+//
+// Example usage pattern (assuming pool is, for example, a *[]byte Pool)
+//
+//	p := pool.Get(size).(*[]byte)
+//	b := *p	// Now you can use b in any way you need.
+//	...
+//	// When b will not be used anymore
+//	pool.Put(p)
+//	...
+//	// If b or p are not going out of scope soon, optionally
+//	b = nil
+//	p = nil
+//
+// Otherwise the pool cannot release the slice on garbage collection.
+//
+// Do not do
+//
+//	p := pool.Get(size).(*[]byte)
+//	b := *p
+//	...
+//	pool.Put(&b)
+//
+// or
+//
+//	b := *pool.Get(size).(*[]byte)
+//	...
+//	pool.Put(&b)
+type Pool struct {
+	cap     func(interface{}) int
+	clear   func(interface{})
+	m       [63]sync.Pool
+	null    interface{}
+	setSize func(interface{}, int)
+}
+
+// NewPool returns a newly created Pool. Assuming the desired slice type is
+// []T:
+//
+// The create function returns a *[]T of len == cap == size.
+//
+// The argument of clear is *[]T and the function sets all the slice elements
+// to the respective zero value.
+//
+// The setSize function gets a *[]T and sets its len to size.
+//
+// The cap function gets a *[]T and returns its capacity.
+func NewPool(
+	create func(size int) interface{},
+	clear func(interface{}),
+	setSize func(p interface{}, size int),
+	cap func(p interface{}) int,
+) *Pool {
+	p := &Pool{clear: clear, setSize: setSize, cap: cap, null: create(0)}
+	for i := range p.m {
+		size := 1 << uint(i)
+		p.m[i] = sync.Pool{New: func() interface{} {
+			// 0:     1 -      1
+			// 1:    10 -     10
+			// 2:    11 -    100
+			// 3:   101 -   1000
+			// 4:  1001 -  10000
+			// 5: 10001 - 100000
+			return create(size)
+		}}
+	}
+	return p
+}
+
+// CGet returns a *[]T of len size. The pointed to slice is zeroed up to its
+// cap. CGet panics for size < 0.
+//
+// CGet is safe for concurrent use by multiple goroutines.
+func (p *Pool) CGet(size int) interface{} {
+	s := p.Get(size)
+	p.clear(s)
+	return s
+}
+
+// Get returns a *[]T of len size. The pointed to slice is not zeroed. Get
+// panics for size < 0.
+//
+// Get is safe for concurrent use by multiple goroutines.
+func (p *Pool) Get(size int) interface{} {
+	var index int
+	switch {
+	case size < 0:
+		panic("Pool.Get: negative size")
+	case size == 0:
+		return p.null
+	case size > 1:
+		index = mathutil.Log2Uint64(uint64(size-1)) + 1
+	}
+	s := p.m[index].Get()
+	p.setSize(s, size)
+	return s
+}
+
+// Put puts a *[]T into a pool for possible later reuse by CGet or Get. Put
+// panics is its argument is not of type *[]T.
+//
+// Put is safe for concurrent use by multiple goroutines.
+func (p *Pool) Put(b interface{}) {
+	size := p.cap(b)
+	if size == 0 {
+		return
+	}
+
+	p.m[mathutil.Log2Uint64(uint64(size))].Put(b)
+}

+ 324 - 0
vendor/github.com/cznic/lldb/2pc.go

@@ -0,0 +1,324 @@
+// Copyright 2014 The lldb Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Two Phase Commit & Structural ACID
+
+package lldb
+
+import (
+	"bufio"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"os"
+
+	"github.com/cznic/fileutil"
+	"github.com/cznic/mathutil"
+)
+
+var _ Filer = &ACIDFiler0{} // Ensure ACIDFiler0 is a Filer
+
+type acidWrite struct {
+	b   []byte
+	off int64
+}
+
+type acidWriter0 ACIDFiler0
+
+func (a *acidWriter0) WriteAt(b []byte, off int64) (n int, err error) {
+	f := (*ACIDFiler0)(a)
+	if f.newEpoch {
+		f.newEpoch = false
+		f.data = f.data[:0]
+		if err = a.writePacket([]interface{}{wpt00Header, walTypeACIDFiler0, ""}); err != nil {
+			return
+		}
+	}
+
+	if err = a.writePacket([]interface{}{wpt00WriteData, b, off}); err != nil {
+		return
+	}
+
+	f.data = append(f.data, acidWrite{b, off})
+	return len(b), nil
+}
+
+func (a *acidWriter0) writePacket(items []interface{}) (err error) {
+	f := (*ACIDFiler0)(a)
+	b, err := EncodeScalars(items...)
+	if err != nil {
+		return
+	}
+
+	var b4 [4]byte
+	binary.BigEndian.PutUint32(b4[:], uint32(len(b)))
+	if _, err = f.bwal.Write(b4[:]); err != nil {
+		return
+	}
+
+	if _, err = f.bwal.Write(b); err != nil {
+		return
+	}
+
+	if m := (4 + len(b)) % 16; m != 0 {
+		var pad [15]byte
+		_, err = f.bwal.Write(pad[:16-m])
+	}
+	return
+}
+
+// WAL Packet Tags
+const (
+	wpt00Header = iota
+	wpt00WriteData
+	wpt00Checkpoint
+)
+
+const (
+	walTypeACIDFiler0 = iota
+)
+
+// ACIDFiler0 is a very simple, synchronous implementation of 2PC. It uses a
+// single write ahead log file to provide the structural atomicity
+// (BeginUpdate/EndUpdate/Rollback) and durability (DB can be recovered from
+// WAL if a crash occurred).
+//
+// ACIDFiler0 is a Filer.
+//
+// NOTE: Durable synchronous 2PC involves three fsyncs in this implementation
+// (WAL, DB, zero truncated WAL).  Where possible, it's recommended to collect
+// transactions for, say one second before performing the two phase commit as
+// the typical performance for rotational hard disks is about few tens of
+// fsyncs per second atmost. For an example of such collective transaction
+// approach please see the colecting FSM STT in Dbm's documentation[1].
+//
+//  [1]: http://godoc.org/github.com/cznic/exp/dbm
+type ACIDFiler0 struct {
+	*RollbackFiler
+	bwal              *bufio.Writer
+	data              []acidWrite
+	newEpoch          bool
+	peakBitFilerPages int   // track maximum transaction memory
+	peakWal           int64 // tracks WAL maximum used size
+	testHook          bool  // keeps WAL untruncated (once)
+	wal               *os.File
+}
+
+// NewACIDFiler0 returns a  newly created ACIDFiler0 with WAL in wal.
+//
+// If the WAL is zero sized then a previous clean shutdown of db is taken for
+// granted and no recovery procedure is taken.
+//
+// If the WAL is of non zero size then it is checked for having a
+// commited/fully finished transaction not yet been reflected in db. If such
+// transaction exists it's committed to db. If the recovery process finishes
+// successfully, the WAL is truncated to zero size and fsync'ed prior to return
+// from NewACIDFiler0.
+func NewACIDFiler(db Filer, wal *os.File) (r *ACIDFiler0, err error) {
+	fi, err := wal.Stat()
+	if err != nil {
+		return
+	}
+
+	r = &ACIDFiler0{wal: wal}
+
+	if fi.Size() != 0 {
+		if err = r.recoverDb(db); err != nil {
+			return
+		}
+	}
+
+	r.bwal = bufio.NewWriter(r.wal)
+	r.newEpoch = true
+	acidWriter := (*acidWriter0)(r)
+
+	if r.RollbackFiler, err = NewRollbackFiler(
+		db,
+		func(sz int64) (err error) {
+			// Checkpoint
+			if err = acidWriter.writePacket([]interface{}{wpt00Checkpoint, sz}); err != nil {
+				return
+			}
+
+			if err = r.bwal.Flush(); err != nil {
+				return
+			}
+
+			if err = r.wal.Sync(); err != nil {
+				return
+			}
+
+			wfi, err := r.wal.Stat()
+			if err == nil {
+				r.peakWal = mathutil.MaxInt64(wfi.Size(), r.peakWal)
+			}
+
+			// Phase 1 commit complete
+
+			for _, v := range r.data {
+				if _, err := db.WriteAt(v.b, v.off); err != nil {
+					return err
+				}
+			}
+
+			if err = db.Truncate(sz); err != nil {
+				return
+			}
+
+			if err = db.Sync(); err != nil {
+				return
+			}
+
+			// Phase 2 commit complete
+
+			if !r.testHook {
+				if err = r.wal.Truncate(0); err != nil {
+					return
+				}
+
+				if _, err = r.wal.Seek(0, 0); err != nil {
+					return
+				}
+			}
+
+			r.testHook = false
+			r.bwal.Reset(r.wal)
+			r.newEpoch = true
+			return r.wal.Sync()
+
+		},
+		acidWriter,
+	); err != nil {
+		return
+	}
+
+	return r, nil
+}
+
+// PeakWALSize reports the maximum size WAL has ever used.
+func (a ACIDFiler0) PeakWALSize() int64 {
+	return a.peakWal
+}
+
+func (a *ACIDFiler0) readPacket(f *bufio.Reader) (items []interface{}, err error) {
+	var b4 [4]byte
+	n, err := io.ReadAtLeast(f, b4[:], 4)
+	if n != 4 {
+		return
+	}
+
+	ln := int(binary.BigEndian.Uint32(b4[:]))
+	m := (4 + ln) % 16
+	padd := (16 - m) % 16
+	b := make([]byte, ln+padd)
+	if n, err = io.ReadAtLeast(f, b, len(b)); n != len(b) {
+		return
+	}
+
+	return DecodeScalars(b[:ln])
+}
+
+func (a *ACIDFiler0) recoverDb(db Filer) (err error) {
+	fi, err := a.wal.Stat()
+	if err != nil {
+		return &ErrILSEQ{Type: ErrInvalidWAL, Name: a.wal.Name(), More: err}
+	}
+
+	if sz := fi.Size(); sz%16 != 0 {
+		return &ErrILSEQ{Type: ErrFileSize, Name: a.wal.Name(), Arg: sz}
+	}
+
+	f := bufio.NewReader(a.wal)
+	items, err := a.readPacket(f)
+	if err != nil {
+		return
+	}
+
+	if len(items) != 3 || items[0] != int64(wpt00Header) || items[1] != int64(walTypeACIDFiler0) {
+		return &ErrILSEQ{Type: ErrInvalidWAL, Name: a.wal.Name(), More: fmt.Sprintf("invalid packet items %#v", items)}
+	}
+
+	tr := NewBTree(nil)
+
+	for {
+		items, err = a.readPacket(f)
+		if err != nil {
+			return
+		}
+
+		if len(items) < 2 {
+			return &ErrILSEQ{Type: ErrInvalidWAL, Name: a.wal.Name(), More: fmt.Sprintf("too few packet items %#v", items)}
+		}
+
+		switch items[0] {
+		case int64(wpt00WriteData):
+			if len(items) != 3 {
+				return &ErrILSEQ{Type: ErrInvalidWAL, Name: a.wal.Name(), More: fmt.Sprintf("invalid data packet items %#v", items)}
+			}
+
+			b, off := items[1].([]byte), items[2].(int64)
+			var key [8]byte
+			binary.BigEndian.PutUint64(key[:], uint64(off))
+			if err = tr.Set(key[:], b); err != nil {
+				return
+			}
+		case int64(wpt00Checkpoint):
+			var b1 [1]byte
+			if n, err := f.Read(b1[:]); n != 0 || err == nil {
+				return &ErrILSEQ{Type: ErrInvalidWAL, Name: a.wal.Name(), More: fmt.Sprintf("checkpoint n %d, err %v", n, err)}
+			}
+
+			if len(items) != 2 {
+				return &ErrILSEQ{Type: ErrInvalidWAL, Name: a.wal.Name(), More: fmt.Sprintf("checkpoint packet invalid items %#v", items)}
+			}
+
+			sz := items[1].(int64)
+			enum, err := tr.seekFirst()
+			if err != nil {
+				return err
+			}
+
+			for {
+				k, v, err := enum.current()
+				if err != nil {
+					if fileutil.IsEOF(err) {
+						break
+					}
+
+					return err
+				}
+
+				if _, err = db.WriteAt(v, int64(binary.BigEndian.Uint64(k))); err != nil {
+					return err
+				}
+
+				if err = enum.next(); err != nil {
+					if fileutil.IsEOF(err) {
+						break
+					}
+
+					return err
+				}
+			}
+
+			if err = db.Truncate(sz); err != nil {
+				return err
+			}
+
+			if err = db.Sync(); err != nil {
+				return err
+			}
+
+			// Recovery complete
+
+			if err = a.wal.Truncate(0); err != nil {
+				return err
+			}
+
+			return a.wal.Sync()
+		default:
+			return &ErrILSEQ{Type: ErrInvalidWAL, Name: a.wal.Name(), More: fmt.Sprintf("packet tag %v", items[0])}
+		}
+	}
+}

+ 44 - 0
vendor/github.com/cznic/lldb/2pc_docs.go

@@ -0,0 +1,44 @@
+// Copyright 2014 The lldb Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+
+Anatomy of a WAL file
+
+WAL file
+	A sequence of packets
+
+WAL packet, parts in slice notation
+	[0:4],   4 bytes:        N uint32        // network byte order
+	[4:4+N], N bytes:        payload []byte  // gb encoded scalars
+
+Packets, including the 4 byte 'size' prefix, MUST BE padded to size == 0 (mod
+16). The values of the padding bytes MUST BE zero.
+
+Encoded scalars first item is a packet type number (packet tag). The meaning of
+any other item(s) of the payload depends on the packet tag.
+
+Packet definitions
+
+	{wpt00Header int, typ int, s string}
+		typ:	Must be zero (ACIDFiler0 file).
+		s:	Any comment string, empty string is okay.
+
+		This packet must be present only once - as the first packet of
+		a WAL file.
+
+	{wpt00WriteData int, b []byte, off int64}
+		Write data (WriteAt(b, off)).
+
+	{wpt00Checkpoint int, sz int64}
+		Checkpoint (Truncate(sz)).
+
+		This packet must be present only once - as the last packet of
+		a WAL file.
+
+*/
+
+package lldb
+
+//TODO optimize bitfiler/wal/2pc data above final size

+ 27 - 0
vendor/github.com/cznic/lldb/LICENSE

@@ -0,0 +1,27 @@
+Copyright (c) 2014 The lldb Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the names of the authors nor the names of the
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ 2344 - 0
vendor/github.com/cznic/lldb/btree.go

@@ -0,0 +1,2344 @@
+// Copyright 2014 The lldb Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lldb
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"io"
+	"sort"
+	"strings"
+
+	"github.com/cznic/fileutil"
+	"github.com/cznic/internal/buffer"
+	"github.com/cznic/sortutil"
+)
+
+const (
+	kData             = 256         // [1, 512]
+	kIndex            = 256         // [2, 2048]
+	kKV               = 19          // Size of the key/value field in btreeDataPage
+	kSz               = kKV - 1 - 7 // Content prefix size
+	kH                = kKV - 7     // Content field offset for handle
+	tagBTreeDataPage  = 1
+	tagBTreeIndexPage = 0
+)
+
+// BTree is a B+tree[1][2], i.e. a variant which speeds up
+// enumeration/iteration of the BTree. According to its origin it can be
+// volatile (backed only by memory) or non-volatile (backed by a non-volatile
+// Allocator).
+//
+// The specific implementation of BTrees in this package are B+trees with
+// delayed split/concatenation (discussed in e.g. [3]).
+//
+// Note: No BTree methods returns io.EOF for physical Filer reads/writes.  The
+// io.EOF is returned only by bTreeEnumerator methods to indicate "no more K-V
+// pair".
+//
+//  [1]: http://en.wikipedia.org/wiki/B+tree
+//  [2]: http://zgking.com:8080/home/donghui/publications/books/dshandbook_BTree.pdf
+//  [3]: http://people.cs.aau.dk/~simas/aalg06/UbiquitBtree.pdf
+type BTree struct {
+	store   btreeStore
+	root    btree
+	collate func(a, b []byte) int
+	serial  uint64
+}
+
+// NewBTree returns a new, memory-only BTree.
+func NewBTree(collate func(a, b []byte) int) *BTree {
+	store := newMemBTreeStore()
+	root, err := newBTree(store)
+	if err != nil { // should not happen
+		panic(err.Error())
+	}
+
+	return &BTree{store, root, collate, 0}
+}
+
+// IsMem reports if t is a memory only BTree.
+func (t *BTree) IsMem() (r bool) {
+	_, r = t.store.(*memBTreeStore)
+	return
+}
+
+// Clear empties the tree.
+func (t *BTree) Clear() (err error) {
+	if t == nil {
+		err = errors.New("BTree method invoked on nil receiver")
+		return
+	}
+
+	t.serial++
+	return t.root.clear(t.store)
+}
+
+// Delete deletes key and its associated value from the tree.
+func (t *BTree) Delete(key []byte) (err error) {
+	if t == nil {
+		err = errors.New("BTree method invoked on nil receiver")
+		return
+	}
+
+	t.serial++
+	_, err = t.root.extract(t.store, nil, t.collate, key)
+	return
+}
+
+// DeleteAny deletes one key and its associated value from the tree. If the
+// tree is empty on return then empty is true.
+func (t *BTree) DeleteAny() (empty bool, err error) {
+	if t == nil {
+		err = errors.New("BTree method invoked on nil receiver")
+		return
+	}
+
+	t.serial++
+	return t.root.deleteAny(t.store)
+}
+
+func elem(v interface{}) string {
+	switch x := v.(type) {
+	default:
+		panic("internal error")
+	case nil:
+		return "nil"
+	case bool:
+		if x {
+			return "true"
+		}
+
+		return "false"
+	case int64:
+		return fmt.Sprint(x)
+	case uint64:
+		return fmt.Sprint(x)
+	case float64:
+		s := fmt.Sprintf("%g", x)
+		if !strings.Contains(s, ".") {
+			s += "."
+		}
+		return s
+	case complex128:
+		s := fmt.Sprint(x)
+		return s[1 : len(s)-1]
+	case []byte:
+		return fmt.Sprintf("[]byte{% 02x}", x)
+	case string:
+		return fmt.Sprintf("%q", x)
+	}
+}
+
+// Dump outputs a human readable dump of t to w. It is usable iff t keys and
+// values are encoded scalars (see EncodeScalars). Intended use is only for
+// examples or debugging. Some type information is lost in the rendering, for
+// example a float value '17.' and an integer value '17' may both output as
+// '17'.
+func (t *BTree) Dump(w io.Writer) (err error) {
+	enum, err := t.seekFirst()
+	if err != nil {
+		return
+	}
+
+	for {
+		bkey, bval, err := enum.current()
+		if err != nil {
+			return err
+		}
+
+		key, err := DecodeScalars(bkey)
+		if err != nil {
+			return err
+		}
+
+		val, err := DecodeScalars(bval)
+		if err != nil {
+			return err
+		}
+
+		kk := []string{}
+		if key == nil {
+			kk = []string{"null"}
+		}
+		for _, v := range key {
+			kk = append(kk, elem(v))
+		}
+		vv := []string{}
+		if val == nil {
+			vv = []string{"null"}
+		}
+		for _, v := range val {
+			vv = append(vv, elem(v))
+		}
+		skey := strings.Join(kk, ", ")
+		sval := strings.Join(vv, ", ")
+		if len(vv) > 1 {
+			sval = fmt.Sprintf("[]interface{%s}", sval)
+		}
+		if _, err = fmt.Fprintf(w, "%s → %s\n", skey, sval); err != nil {
+			return err
+		}
+
+		err = enum.next()
+		if err != nil {
+			if fileutil.IsEOF(err) {
+				err = nil
+				break
+			}
+
+			return err
+		}
+	}
+	return
+}
+
+// Extract is a combination of Get and Delete. If the key exists in the tree,
+// it is returned (like Get) and also deleted from a tree in a more efficient
+// way which doesn't walk it twice.  The returned slice may be a sub-slice of
+// buf if buf was large enough to hold the entire content.  Otherwise, a newly
+// allocated slice will be returned.  It is valid to pass a nil buf.
+func (t *BTree) Extract(buf, key []byte) (value []byte, err error) {
+	if t == nil {
+		err = errors.New("BTree method invoked on nil receiver")
+		return
+	}
+
+	t.serial++
+	return t.root.extract(t.store, buf, t.collate, key)
+}
+
+// First returns the first KV pair of the tree, if it exists. Otherwise key == nil
+// and value == nil.
+func (t *BTree) First() (key, value []byte, err error) {
+	if t == nil {
+		err = errors.New("BTree method invoked on nil receiver")
+		return
+	}
+
+	var p btreeDataPage
+	if _, p, err = t.root.first(t.store); err != nil || p == nil {
+		return
+	}
+
+	if key, err = p.key(t.store, 0); err != nil {
+		return
+	}
+
+	value, err = p.value(t.store, 0)
+	return
+}
+
+// Get returns the value associated with key, or nil if no such value exists.
+// The returned slice may be a sub-slice of buf if buf was large enough to hold
+// the entire content.  Otherwise, a newly allocated slice will be returned.
+// It is valid to pass a nil buf.
+//
+// Get is safe for concurrent access by multiple goroutines iff no other
+// goroutine mutates the tree.
+func (t *BTree) Get(buf, key []byte) (value []byte, err error) {
+	if t == nil {
+		err = errors.New("BTree method invoked on nil receiver")
+		return
+	}
+
+	pbuffer := buffer.Get(maxBuf)
+	defer buffer.Put(pbuffer)
+	buffer := *pbuffer
+	if buffer, err = t.root.get(t.store, buffer, t.collate, key); buffer == nil || err != nil {
+		return
+	}
+
+	if len(buffer) != 0 {
+		// The buffer cache returns nil for empty buffers, bypass it
+		value = need(len(buffer), buf)
+	} else {
+		value = []byte{}
+	}
+	copy(value, buffer)
+	return
+}
+
+// Handle reports t's handle.
+func (t *BTree) Handle() int64 {
+	return int64(t.root)
+}
+
+// Last returns the last KV pair of the tree, if it exists. Otherwise key == nil
+// and value == nil.
+func (t *BTree) Last() (key, value []byte, err error) {
+	if t == nil {
+		err = errors.New("BTree method invoked on nil receiver")
+		return
+	}
+
+	var p btreeDataPage
+	if _, p, err = t.root.last(t.store); err != nil || p == nil {
+		return
+	}
+
+	index := p.len() - 1
+	if key, err = p.key(t.store, index); err != nil {
+		return
+	}
+
+	value, err = p.value(t.store, index)
+	return
+}
+
+// Put combines Get and Set in a more efficient way where the tree is walked
+// only once.  The upd(ater) receives the current (key, old-value), if that
+// exists or (key, nil) otherwise.  It can then return a (new-value, true, nil)
+// to create or overwrite the existing value in the KV pair, or (whatever,
+// false, nil) if it decides not to create or not to update the value of the KV
+// pair.
+//
+// 	tree.Set(k, v)
+//
+// conceptually equals
+//
+// 	tree.Put(k, func(k, v []byte){ return v, true }([]byte, bool))
+//
+// modulo the differing return values.
+//
+// The returned slice may be a sub-slice of buf if buf was large enough to hold
+// the entire content.  Otherwise, a newly allocated slice will be returned.
+// It is valid to pass a nil buf.
+func (t *BTree) Put(buf, key []byte, upd func(key, old []byte) (new []byte, write bool, err error)) (old []byte, written bool, err error) {
+	if t == nil {
+		err = errors.New("BTree method invoked on nil receiver")
+		return
+	}
+
+	t.serial++
+	return t.root.put2(buf, t.store, t.collate, key, upd)
+}
+
+// Seek returns an Enumerator with "position" or an error of any. Normally the
+// position is on a KV pair such that key >= KV.key. Then hit is key == KV.key.
+// The position is possibly "after" the last KV pair, but that is not an error.
+//
+// Seek is safe for concurrent access by multiple goroutines iff no other
+// goroutine mutates the tree.
+func (t *BTree) Seek(key []byte) (enum *BTreeEnumerator, hit bool, err error) {
+	enum0, hit, err := t.seek(key)
+	if err != nil {
+		return
+	}
+
+	enum = &BTreeEnumerator{
+		enum:     enum0,
+		firstHit: hit,
+		key:      append([]byte(nil), key...),
+	}
+	return
+}
+
+func (t *BTree) seek(key []byte) (enum *bTreeEnumerator, hit bool, err error) {
+	if t == nil {
+		err = errors.New("BTree method invoked on nil receiver")
+		return
+	}
+
+	r := &bTreeEnumerator{t: t, collate: t.collate, serial: t.serial}
+	if r.p, r.index, hit, err = t.root.seek(t.store, r.collate, key); err != nil {
+		return
+	}
+
+	enum = r
+	return
+}
+
+// IndexSeek returns an Enumerator with "position" or an error of any. Normally
+// the position is on a KV pair such that key >= KV.key. Then hit is key ==
+// KV.key.  The position is possibly "after" the last KV pair, but that is not
+// an error.  The collate function originally passed to CreateBTree is used for
+// enumerating the tree but a custom collate function c is used for IndexSeek.
+//
+// IndexSeek is safe for concurrent access by multiple goroutines iff no other
+// goroutine mutates the tree.
+func (t *BTree) IndexSeek(key []byte, c func(a, b []byte) int) (enum *BTreeEnumerator, hit bool, err error) { //TODO +test
+	enum0, hit, err := t.indexSeek(key, c)
+	if err != nil {
+		return
+	}
+
+	enum = &BTreeEnumerator{
+		enum:     enum0,
+		firstHit: hit,
+		key:      append([]byte(nil), key...),
+	}
+	return
+}
+
+func (t *BTree) indexSeek(key []byte, c func(a, b []byte) int) (enum *bTreeEnumerator, hit bool, err error) {
+	if t == nil {
+		err = errors.New("BTree method invoked on nil receiver")
+		return
+	}
+
+	r := &bTreeEnumerator{t: t, collate: t.collate, serial: t.serial}
+	if r.p, r.index, hit, err = t.root.seek(t.store, c, key); err != nil {
+		return
+	}
+
+	enum = r
+	return
+}
+
+// seekFirst returns an enumerator positioned on the first KV pair in the tree,
+// if any.  For an empty tree, err == io.EOF is returend.
+//
+// SeekFirst is safe for concurrent access by multiple goroutines iff no other
+// goroutine mutates the tree.
+func (t *BTree) SeekFirst() (enum *BTreeEnumerator, err error) {
+	enum0, err := t.seekFirst()
+	if err != nil {
+		return
+	}
+
+	var key []byte
+	if key, _, err = enum0.current(); err != nil {
+		return
+	}
+
+	enum = &BTreeEnumerator{
+		enum:     enum0,
+		firstHit: true,
+		key:      append([]byte(nil), key...),
+	}
+	return
+}
+
+func (t *BTree) seekFirst() (enum *bTreeEnumerator, err error) {
+	if t == nil {
+		err = errors.New("BTree method invoked on nil receiver")
+		return
+	}
+
+	var p btreeDataPage
+	if _, p, err = t.root.first(t.store); err == nil && p == nil {
+		err = io.EOF
+	}
+	if err != nil {
+		return
+	}
+
+	return &bTreeEnumerator{t: t, collate: t.collate, p: p, index: 0, serial: t.serial}, nil
+}
+
+// seekLast returns an enumerator positioned on the last KV pair in the tree,
+// if any.  For an empty tree, err == io.EOF is returend.
+//
+// SeekLast is safe for concurrent access by multiple goroutines iff no other
+// goroutine mutates the tree.
+func (t *BTree) SeekLast() (enum *BTreeEnumerator, err error) {
+	enum0, err := t.seekLast()
+	if err != nil {
+		return
+	}
+
+	var key []byte
+	if key, _, err = enum0.current(); err != nil {
+		return
+	}
+
+	enum = &BTreeEnumerator{
+		enum:     enum0,
+		firstHit: true,
+		key:      append([]byte(nil), key...),
+	}
+	return
+}
+
+func (t *BTree) seekLast() (enum *bTreeEnumerator, err error) {
+	if t == nil {
+		err = errors.New("BTree method invoked on nil receiver")
+		return
+	}
+
+	var p btreeDataPage
+	if _, p, err = t.root.last(t.store); err == nil && p == nil {
+		err = io.EOF
+	}
+	if err != nil {
+		return
+	}
+
+	return &bTreeEnumerator{t: t, collate: t.collate, p: p, index: p.len() - 1, serial: t.serial}, nil
+}
+
+// Set sets the value associated with key. Any previous value, if existed, is
+// overwritten by the new one.
+func (t *BTree) Set(key, value []byte) (err error) {
+	if t == nil {
+		err = errors.New("BTree method invoked on nil receiver")
+		return
+	}
+
+	t.serial++
+	pdst := buffer.Get(maxBuf)
+	dst := *pdst
+	_, err = t.root.put(dst, t.store, t.collate, key, value, true)
+	buffer.Put(pdst)
+	return
+}
+
+// bTreeEnumerator is a closure of a BTree and a position. It is returned from
+// BTree.seek.
+//
+// NOTE: bTreeEnumerator cannot be used after its BTree was mutated after the
+// bTreeEnumerator was acquired from any of the seek, seekFirst, seekLast
+// methods.
+type bTreeEnumerator struct {
+	t       *BTree
+	collate func(a, b []byte) int
+	p       btreeDataPage
+	index   int
+	serial  uint64
+}
+
+// Current returns the KV pair the enumerator is currently positioned on. If
+// the position is before the first KV pair in the tree or after the last KV
+// pair in the tree then err == io.EOF is returned.
+//
+// If the enumerator has been invalidated by updating the tree, ErrINVAL is
+// returned.
+func (e *bTreeEnumerator) current() (key, value []byte, err error) {
+	if e == nil {
+		err = errors.New("bTreeEnumerator method invoked on nil receiver")
+		return
+	}
+
+	if e.serial != e.t.serial {
+		err = &ErrINVAL{Src: "bTreeEnumerator invalidated by updating the tree"}
+		return
+	}
+
+	if e.p == nil || e.index == e.p.len() {
+		return nil, nil, io.EOF
+	}
+
+	if key, err = e.p.key(e.t.store, e.index); err != nil {
+		return
+	}
+
+	value, err = e.p.value(e.t.store, e.index)
+	return
+}
+
+// Next attempts to position the enumerator onto the next KV pair wrt the
+// current position. If there is no "next" KV pair, io.EOF is returned.
+//
+// If the enumerator has been invalidated by updating the tree, ErrINVAL is
+// returned.
+func (e *bTreeEnumerator) next() (err error) {
+	if e == nil {
+		err = errors.New("bTreeEnumerator method invoked on nil receiver")
+		return
+	}
+
+	if e.serial != e.t.serial {
+		err = &ErrINVAL{Src: "bTreeEnumerator invalidated by updating the tree"}
+		return
+	}
+
+	if e.p == nil {
+		return io.EOF
+	}
+
+	switch {
+	case e.index < e.p.len()-1:
+		e.index++
+	default:
+		ph := e.p.next()
+		if ph == 0 {
+			err = io.EOF
+			break
+		}
+
+		if e.p, err = e.t.store.Get(e.p, ph); err != nil {
+			e.p = nil
+			return
+		}
+		e.index = 0
+	}
+	return
+}
+
+// Prev attempts to position the enumerator onto the previous KV pair wrt the
+// current position. If there is no "previous" KV pair, io.EOF is returned.
+//
+// If the enumerator has been invalidated by updating the tree, ErrINVAL is
+// returned.
+func (e *bTreeEnumerator) prev() (err error) {
+	if e == nil {
+		err = errors.New("bTreeEnumerator method invoked on nil receiver")
+		return
+	}
+
+	if e.serial != e.t.serial {
+		err = &ErrINVAL{Src: "bTreeEnumerator invalidated by updating the tree"}
+		return
+	}
+
+	if e.p == nil {
+		return io.EOF
+	}
+
+	switch {
+	case e.index > 0:
+		e.index--
+	default:
+		ph := e.p.prev()
+		if ph == 0 {
+			err = io.EOF
+			break
+		}
+
+		if e.p, err = e.t.store.Get(e.p, ph); err != nil {
+			e.p = nil
+			return
+		}
+		e.index = e.p.len() - 1
+	}
+	return
+}
+
+// BTreeEnumerator captures the state of enumerating a tree. It is returned
+// from the Seek* methods.  The enumerator is aware of any mutations made to
+// the tree in the process of enumerating it and automatically resumes the
+// enumeration.
+type BTreeEnumerator struct {
+	enum     *bTreeEnumerator
+	err      error
+	key      []byte
+	firstHit bool
+}
+
+// Next returns the currently enumerated KV pair, if it exists and moves to the
+// next KV in the key collation order. If there is no KV pair to return, err ==
+// io.EOF is returned.
+//
+// Next is safe for concurrent access by multiple goroutines iff no other
+// goroutine mutates the tree.
+func (e *BTreeEnumerator) Next() (key, value []byte, err error) {
+	if err = e.err; err != nil {
+		return
+	}
+
+	canRetry := true
+retry:
+	if e.enum.p == nil {
+		e.err = io.EOF
+		return nil, nil, e.err
+	}
+
+	if e.enum.index == e.enum.p.len() && e.enum.serial == e.enum.t.serial {
+		if err := e.enum.next(); err != nil {
+			e.err = err
+			return nil, nil, e.err
+		}
+	}
+
+	if key, value, err = e.enum.current(); err != nil {
+		if _, ok := err.(*ErrINVAL); !ok || !canRetry {
+			e.err = err
+			return
+		}
+
+		canRetry = false
+		var hit bool
+		if e.enum, hit, err = e.enum.t.seek(e.key); err != nil {
+			e.err = err
+			return
+		}
+
+		if !e.firstHit && hit {
+			err = e.enum.next()
+			if err != nil {
+				e.err = err
+				return
+			}
+		}
+
+		goto retry
+	}
+
+	e.firstHit = false
+	e.key = append([]byte(nil), key...)
+	e.err = e.enum.next()
+	return
+}
+
+// Prev returns the currently enumerated KV pair, if it exists and moves to the
+// previous KV in the key collation order. If there is no KV pair to return,
+// err == io.EOF is returned.
+//
+// Prev is safe for concurrent access by multiple goroutines iff no other
+// goroutine mutates the tree.
+func (e *BTreeEnumerator) Prev() (key, value []byte, err error) {
+	if err = e.err; err != nil {
+		return
+	}
+
+	canRetry := true
+retry:
+	if key, value, err = e.enum.current(); err != nil {
+		if _, ok := err.(*ErrINVAL); !ok || !canRetry {
+			e.err = err
+			return
+		}
+
+		canRetry = false
+		var hit bool
+		if e.enum, hit, err = e.enum.t.seek(e.key); err != nil {
+			e.err = err
+			return
+		}
+
+		if !e.firstHit && hit {
+			err = e.enum.prev()
+			if err != nil {
+				e.err = err
+				return
+			}
+		}
+
+		goto retry
+	}
+
+	e.firstHit = false
+	e.key = append([]byte(nil), key...)
+	e.err = e.enum.prev()
+	return
+}
+
+// CreateBTree creates a new BTree in store. It returns the tree, its (freshly
+// assigned) handle (for OpenBTree or RemoveBTree) or an error, if any.
+func CreateBTree(store *Allocator, collate func(a, b []byte) int) (bt *BTree, handle int64, err error) {
+	r := &BTree{store: store, collate: collate}
+	if r.root, err = newBTree(store); err != nil {
+		return
+	}
+
+	return r, int64(r.root), nil
+}
+
+// OpenBTree opens a store's BTree using handle. It returns the tree or an
+// error, if any. The same tree may be opened more than once, but operations on
+// the separate instances should not ever overlap or void the other instances.
+// However, the intended API usage is to open the same tree handle only once
+// (handled by some upper layer "dispatcher").
+func OpenBTree(store *Allocator, collate func(a, b []byte) int, handle int64) (bt *BTree, err error) {
+	r := &BTree{store: store, root: btree(handle), collate: collate}
+	pb := buffer.Get(7)
+	defer buffer.Put(pb)
+	b := *pb
+	if b, err = store.Get(b, handle); err != nil {
+		return
+	}
+
+	if len(b) != 7 {
+		return nil, &ErrILSEQ{Off: h2off(handle), More: "btree.go:671"}
+	}
+
+	return r, nil
+}
+
+// RemoveBTree removes tree, represented by handle from store. Empty trees are
+// cheap, each uses only few bytes of the store. If there's a chance that a
+// tree will eventually get reused (non empty again), it's recommended to
+// not/never remove it.  One advantage of such approach is a stable handle of
+// such tree.
+func RemoveBTree(store *Allocator, handle int64) (err error) {
+	tree, err := OpenBTree(store, nil, handle)
+	if err != nil {
+		return
+	}
+
+	if err = tree.Clear(); err != nil {
+		return
+	}
+
+	return store.Free(handle)
+}
+
+type btreeStore interface {
+	Alloc(b []byte) (handle int64, err error)
+	Free(handle int64) (err error)
+	Get(dst []byte, handle int64) (b []byte, err error)
+	Realloc(handle int64, b []byte) (err error)
+}
+
+// Read only zero bytes
+var zeros [2 * kKV]byte
+
+func init() {
+	if kData < 1 || kData > 512 {
+		panic(fmt.Errorf("kData %d: out of limits", kData))
+	}
+
+	if kIndex < 2 || kIndex > 2048 {
+		panic(fmt.Errorf("kIndex %d: out of limits", kIndex))
+	}
+
+	if kKV < 8 || kKV > 23 {
+		panic(fmt.Errorf("kKV %d: out of limits", kKV))
+	}
+
+	if n := len(zeros); n < 15 {
+		panic(fmt.Errorf("not enough zeros: %d", n))
+	}
+}
+
+type memBTreeStore struct {
+	h int64
+	m map[int64][]byte
+}
+
+func newMemBTreeStore() *memBTreeStore {
+	return &memBTreeStore{h: 0, m: map[int64][]byte{}}
+}
+
+func (s *memBTreeStore) String() string {
+	var a sortutil.Int64Slice
+	for k := range s.m {
+		a = append(a, k)
+	}
+	sort.Sort(a)
+	var sa []string
+	for _, k := range a {
+		sa = append(sa, fmt.Sprintf("%#x:|% x|", k, s.m[k]))
+	}
+	return strings.Join(sa, "\n")
+}
+
+func (s *memBTreeStore) Alloc(b []byte) (handle int64, err error) {
+	s.h++
+	handle = s.h
+	s.m[handle] = bpack(b)
+	return
+}
+
+func (s *memBTreeStore) Free(handle int64) (err error) {
+	if _, ok := s.m[handle]; !ok {
+		return &ErrILSEQ{Type: ErrOther, Off: h2off(handle), More: "btree.go:754"}
+	}
+
+	delete(s.m, handle)
+	return
+}
+
+func (s *memBTreeStore) Get(dst []byte, handle int64) (b []byte, err error) {
+	r, ok := s.m[handle]
+	if !ok {
+		return nil, &ErrILSEQ{Type: ErrOther, Off: h2off(handle), More: "btree.go:764"}
+	}
+
+	b = need(len(r), dst)
+	copy(b, r)
+	return
+}
+
+func (s *memBTreeStore) Realloc(handle int64, b []byte) (err error) {
+	if _, ok := s.m[handle]; !ok {
+		return &ErrILSEQ{Type: ErrOther, Off: h2off(handle), More: "btree.go:774"}
+	}
+
+	s.m[handle] = bpack(b)
+	return
+}
+
+/*
+
+0...0 (1 bytes):
+Flag
+
+	  0
+	+---+
+	| 0 |
+	+---+
+
+0 indicates an index page
+
+1...count*14-1
+"array" of items, 14 bytes each. Count of items in kIndex-1..2*kIndex+2
+
+	Count = (len(raw) - 8) / 14
+
+	  0..6     7..13
+	+-------+----------+
+	| Child | DataPage |
+	+-------+----------+
+
+	Child    == handle of a child index page
+	DataPage == handle of a data page
+
+Offsets into the raw []byte:
+Child[X]    == 1+14*X
+DataPage[X] == 8+14*X
+
+*/
+type btreeIndexPage []byte
+
+func newBTreeIndexPage(leftmostChild int64) (p btreeIndexPage) {
+	p = (*buffer.Get(1 + (kIndex+1)*2*7))[:8]
+	p[0] = tagBTreeIndexPage
+	h2b(p[1:], leftmostChild)
+	return
+}
+
+func (p btreeIndexPage) len() int {
+	return (len(p) - 8) / 14
+}
+
+func (p btreeIndexPage) child(index int) int64 {
+	return b2h(p[1+14*index:])
+}
+
+func (p btreeIndexPage) setChild(index int, dp int64) {
+	h2b(p[1+14*index:], dp)
+}
+
+func (p btreeIndexPage) dataPage(index int) int64 {
+	return b2h(p[8+14*index:])
+}
+
+func (p btreeIndexPage) setDataPage(index int, dp int64) {
+	h2b(p[8+14*index:], dp)
+}
+
+func (q btreeIndexPage) insert(index int) btreeIndexPage {
+	switch len0 := q.len(); {
+	case index < len0:
+		has := len(q)
+		need := has + 14
+		switch {
+		case cap(q) >= need:
+			q = q[:need]
+		default:
+			q = append(q, zeros[:14]...)
+		}
+		copy(q[8+14*(index+1):8+14*(index+1)+2*(len0-index)*7], q[8+14*index:])
+	case index == len0:
+		has := len(q)
+		need := has + 14
+		switch {
+		case cap(q) >= need:
+			q = q[:need]
+		default:
+			q = append(q, zeros[:14]...)
+		}
+	}
+	return q
+}
+
+func (p btreeIndexPage) insert3(index int, dataPage, child int64) btreeIndexPage {
+	p = p.insert(index)
+	p.setDataPage(index, dataPage)
+	p.setChild(index+1, child)
+	return p
+}
+
+func (p btreeIndexPage) cmp(a btreeStore, c func(a, b []byte) int, keyA []byte, keyBIndex int) (int, error) {
+	pb := buffer.Get(maxBuf)
+	defer buffer.Put(pb)
+	b := *pb
+	dp, err := a.Get(b, p.dataPage(keyBIndex))
+	if err != nil {
+		return 0, err
+	}
+
+	return btreeDataPage(dp).cmp(a, c, keyA, 0)
+}
+
+func (q btreeIndexPage) setLen(n int) btreeIndexPage {
+	q = q[:cap(q)]
+	need := 8 + 14*n
+	if need < len(q) {
+		return q[:need]
+	}
+	return append(q, make([]byte, need-len(q))...)
+}
+
+func (p btreeIndexPage) split(a btreeStore, root btree, ph *int64, parent int64, parentIndex int, index *int) (btreeIndexPage, error) {
+	right := newBTreeIndexPage(0)
+	right = right.setLen(kIndex)
+	copy(right[1:1+(2*kIndex+1)*7], p[1+14*(kIndex+1):])
+	p = p.setLen(kIndex)
+	if err := a.Realloc(*ph, p); err != nil {
+		return nil, err
+	}
+
+	rh, err := a.Alloc(right)
+	if err != nil {
+		return nil, err
+	}
+
+	if parentIndex >= 0 {
+		ppp := buffer.Get(maxBuf)
+		defer buffer.Put(ppp)
+		pp := btreeIndexPage(*ppp)
+		if pp, err = a.Get(pp, parent); err != nil {
+			return nil, err
+		}
+		pp = pp.insert3(parentIndex, p.dataPage(kIndex), rh)
+		if err = a.Realloc(parent, pp); err != nil {
+			return nil, err
+		}
+
+	} else {
+		nr := newBTreeIndexPage(*ph)
+		nr = nr.insert3(0, p.dataPage(kIndex), rh)
+		nrh, err := a.Alloc(nr)
+		if err != nil {
+			return nil, err
+		}
+
+		if err = a.Realloc(int64(root), h2b(make([]byte, 7), nrh)); err != nil {
+			return nil, err
+		}
+	}
+	if *index > kIndex {
+		p = right
+		*ph = rh
+		*index -= kIndex + 1
+	}
+	return p, nil
+}
+
+// p is dirty on return
+func (p btreeIndexPage) extract(index int) btreeIndexPage {
+	n := p.len() - 1
+	if index < n {
+		sz := (n-index)*14 + 7
+		copy(p[1+14*index:1+14*index+sz], p[1+14*(index+1):])
+	}
+	return p.setLen(n)
+}
+
+// must persist all changes made
+func (p btreeIndexPage) underflow(a btreeStore, root, iroot, parent int64, ph *int64, parentIndex int, index *int) (btreeIndexPage, error) {
+	lh, rh, err := checkSiblings(a, parent, parentIndex)
+	if err != nil {
+		return nil, err
+	}
+
+	pleft := buffer.Get(maxBuf)
+	defer buffer.Put(pleft)
+	left := btreeIndexPage(*pleft)
+
+	if lh != 0 {
+		if left, err = a.Get(left, lh); err != nil {
+			return nil, err
+		}
+
+		if lc := btreeIndexPage(left).len(); lc > kIndex {
+			ppp := buffer.Get(maxBuf)
+			defer buffer.Put(ppp)
+			pp := *ppp
+			if pp, err = a.Get(pp, parent); err != nil {
+				return nil, err
+			}
+
+			pc := p.len()
+			p = p.setLen(pc + 1)
+			di, si, sz := 1+1*14, 1+0*14, (2*pc+1)*7
+			copy(p[di:di+sz], p[si:])
+			p.setChild(0, btreeIndexPage(left).child(lc))
+			p.setDataPage(0, btreeIndexPage(pp).dataPage(parentIndex-1))
+			*index++
+			btreeIndexPage(pp).setDataPage(parentIndex-1, btreeIndexPage(left).dataPage(lc-1))
+			left = left.setLen(lc - 1)
+			if err = a.Realloc(parent, pp); err != nil {
+				return nil, err
+			}
+
+			if err = a.Realloc(*ph, p); err != nil {
+				return nil, err
+			}
+
+			return p, a.Realloc(lh, left)
+		}
+	}
+
+	if rh != 0 {
+		pright := buffer.Get(maxBuf)
+		defer buffer.Put(pright)
+		right := *pright
+		if right, err = a.Get(right, rh); err != nil {
+			return nil, err
+		}
+
+		if rc := btreeIndexPage(right).len(); rc > kIndex {
+			ppp := buffer.Get(maxBuf)
+			defer buffer.Put(ppp)
+			pp := *ppp
+			if pp, err = a.Get(pp, parent); err != nil {
+				return nil, err
+			}
+
+			pc := p.len()
+			p = p.setLen(pc + 1)
+			p.setDataPage(pc, btreeIndexPage(pp).dataPage(parentIndex))
+			pc++
+			p.setChild(pc, btreeIndexPage(right).child(0))
+			btreeIndexPage(pp).setDataPage(parentIndex, btreeIndexPage(right).dataPage(0))
+			di, si, sz := 1+0*14, 1+1*14, (2*rc+1)*7
+			copy(right[di:di+sz], right[si:])
+			right = btreeIndexPage(right).setLen(rc - 1)
+			if err = a.Realloc(parent, pp); err != nil {
+				return nil, err
+			}
+
+			if err = a.Realloc(*ph, p); err != nil {
+				return nil, err
+			}
+
+			return p, a.Realloc(rh, right)
+		}
+	}
+
+	if lh != 0 {
+		*index += left.len() + 1
+		if left, err = left.concat(a, root, iroot, parent, lh, *ph, parentIndex-1); err != nil {
+			return p, err
+		}
+
+		p, *ph = left, lh
+		return p, nil
+	}
+
+	return p.concat(a, root, iroot, parent, *ph, rh, parentIndex)
+}
+
+// must persist all changes made
+func (p btreeIndexPage) concat(a btreeStore, root, iroot, parent, ph, rh int64, parentIndex int) (btreeIndexPage, error) {
+	ppp := buffer.Get(maxBuf)
+	defer buffer.Put(ppp)
+	pp := *ppp
+	pp, err := a.Get(pp, parent)
+	if err != nil {
+		return nil, err
+	}
+
+	pright := buffer.Get(maxBuf)
+	defer buffer.Put(pright)
+	right := *pright
+	if right, err = a.Get(right, rh); err != nil {
+		return nil, err
+	}
+
+	pc := p.len()
+	rc := btreeIndexPage(right).len()
+	p = p.setLen(pc + rc + 1)
+	p.setDataPage(pc, btreeIndexPage(pp).dataPage(parentIndex))
+	di, si, sz := 1+14*(pc+1), 1+0*14, (2*rc+1)*7
+	copy(p[di:di+sz], right[si:])
+	if err := a.Realloc(ph, p); err != nil {
+		return nil, err
+	}
+
+	if err := a.Free(rh); err != nil {
+		return nil, err
+	}
+
+	if pc := btreeIndexPage(pp).len(); pc > 1 {
+		if parentIndex < pc-1 {
+			di, si, sz := 8+parentIndex*14, 8+(parentIndex+1)*14, 2*(pc-1-parentIndex)*7
+			copy(pp[di:si+sz], pp[si:])
+		}
+		pp = btreeIndexPage(pp).setLen(pc - 1)
+		return p, a.Realloc(parent, pp)
+	}
+
+	if err := a.Free(iroot); err != nil {
+		return nil, err
+	}
+
+	pb7 := buffer.Get(7)
+	defer buffer.Put(pb7)
+	b7 := *pb7
+	return p, a.Realloc(root, h2b(b7[:7], ph))
+}
+
+/*
+
+0...0 (1 bytes):
+Flag
+
+	  0
+	+---+
+	| 1 |
+	+---+
+
+1 indicates a data page
+
+1...14 (14 bytes)
+
+	  1..7  8..14
+	+------+------+
+	| Prev | Next |
+	+------+------+
+
+	Prev, Next == Handles of the data pages doubly linked list
+
+	Count = (len(raw) - 15) / (2*kKV)
+
+15...count*2*kKV-1
+"array" of items, 2*kKV bytes each. Count of items in kData-1..2*kData
+
+Item
+	  0..kKV-1   kKV..2*kKV-1
+	+----------+--------------+
+	|   Key    |    Value     |
+	+----------+--------------+
+
+Key/Value encoding
+
+Length 0...kKV-1
+
+	  0    1...N    N+1...kKV-1
+	+---+---------+-------------+
+	| N |  Data   |  Padding    |
+	+---+---------+-------------+
+
+	N       == content length
+	Data    == Key or Value content
+	Padding == MUST be zero bytes
+
+Length >= kKV
+
+	   0     1...kkV-8   kKV-7...kkV-1
+	+------+-----------+--------------+
+	| 0xFF |   Data    |      H       |
+	+------+-----------+--------------+
+
+	Data == Key or Value content, first kKV-7 bytes
+	H    == Handle to THE REST of the content, w/o the first bytes in Data.
+
+Offsets into the raw []byte:
+Key[X]   == 15+2*kKV*X
+Value[X] == 15+kKV+2*kKV*X
+*/
+type btreeDataPage []byte
+
+func newBTreeDataPage() (p btreeDataPage) {
+	p = (*buffer.CGet(1 + 2*7 + (kData+1)*2*kKV))[:1+2*7]
+	p[0] = tagBTreeDataPage
+	return
+}
+
+func newBTreeDataPageAlloc(a btreeStore) (p btreeDataPage, h int64, err error) {
+	p = newBTreeDataPage()
+	h, err = a.Alloc(p)
+	return
+}
+
+func (p btreeDataPage) len() int {
+	return (len(p) - 15) / (2 * kKV)
+}
+
+func (q btreeDataPage) setLen(n int) btreeDataPage {
+	q = q[:cap(q)]
+	need := 15 + 2*kKV*n
+	if need < len(q) {
+		return q[:need]
+	}
+	return append(q, make([]byte, need-len(q))...)
+}
+
+func (p btreeDataPage) prev() int64 {
+	return b2h(p[1:])
+}
+
+func (p btreeDataPage) next() int64 {
+	return b2h(p[8:])
+}
+
+func (p btreeDataPage) setPrev(h int64) {
+	h2b(p[1:], h)
+}
+
+func (p btreeDataPage) setNext(h int64) {
+	h2b(p[8:], h)
+}
+
+func (q btreeDataPage) insert(index int) btreeDataPage {
+	switch len0 := q.len(); {
+	case index < len0:
+		has := len(q)
+		need := has + 2*kKV
+		switch {
+		case cap(q) >= need:
+			q = q[:need]
+		default:
+			q = append(q, zeros[:2*kKV]...)
+		}
+		q.copy(q, index+1, index, len0-index)
+		return q
+	case index == len0:
+		has := len(q)
+		need := has + 2*kKV
+		switch {
+		case cap(q) >= need:
+			return q[:need]
+		default:
+			return append(q, zeros[:2*kKV]...)
+		}
+	}
+	panic("internal error")
+}
+
+func (p btreeDataPage) contentField(off int) (b []byte, h int64) {
+	p = p[off:]
+	switch n := int(p[0]); {
+	case n >= kKV: // content has a handle
+		b = append([]byte(nil), p[1:1+kSz]...)
+		h = b2h(p[kH:])
+	default: // content is embedded
+		b, h = append([]byte(nil), p[1:1+n]...), 0
+	}
+	return
+}
+
+func (p btreeDataPage) content(a btreeStore, off int) (b []byte, err error) {
+	b, h := p.contentField(off)
+	if h == 0 {
+		return
+	}
+
+	// content has a handle
+	b2, err := a.Get(nil, h) //TODO buffers: Later, not a public API
+	if err != nil {
+		return nil, err
+	}
+
+	return append(b, b2...), nil
+}
+
+func (p btreeDataPage) setContent(a btreeStore, off int, b []byte) (err error) {
+	p = p[off:]
+	switch {
+	case p[0] >= kKV: // existing content has a handle
+		switch n := len(b); {
+		case n < kKV:
+			p[0] = byte(n)
+			if err = a.Free(b2h(p[kH:])); err != nil {
+				return
+			}
+			copy(p[1:], b)
+		default:
+			// reuse handle
+			copy(p[1:1+kSz], b)
+			return a.Realloc(b2h(p[kH:]), b[kSz:])
+		}
+	default: // existing content is embedded
+		switch n := len(b); {
+		case n < kKV:
+			p[0] = byte(n)
+			copy(p[1:], b)
+		default:
+			p[0] = 0xff
+			copy(p[1:1+kSz], b)
+			h, err := a.Alloc(b[kSz:])
+			if err != nil {
+				return err
+			}
+
+			h2b(p[kH:], h)
+		}
+	}
+	return
+}
+
+func (p btreeDataPage) keyField(index int) (b []byte, h int64) {
+	return p.contentField(15 + 2*kKV*index)
+}
+
+func (p btreeDataPage) key(a btreeStore, index int) (b []byte, err error) {
+	return p.content(a, 15+2*kKV*index)
+}
+
+func (p btreeDataPage) valueField(index int) (b []byte, h int64) {
+	return p.contentField(15 + kKV + 2*kKV*index)
+}
+
+func (p btreeDataPage) value(a btreeStore, index int) (b []byte, err error) {
+	value, err := p.content(a, 15+kKV+2*kKV*index)
+	if err == nil && value == nil {
+		// We have a valid page, no fetch error, the key is valid so return
+		// non-nil data
+		return []byte{}, nil
+	}
+	return value, err
+}
+
+func (p btreeDataPage) valueCopy(a btreeStore, index int) (b []byte, err error) {
+	if b, err = p.content(a, 15+kKV+2*kKV*index); err != nil {
+		return
+	}
+
+	return append([]byte(nil), b...), nil
+}
+
+func (p btreeDataPage) setKey(a btreeStore, index int, key []byte) (err error) {
+	return p.setContent(a, 15+2*kKV*index, key)
+}
+
+func (p btreeDataPage) setValue(a btreeStore, index int, value []byte) (err error) {
+	return p.setContent(a, 15+kKV+2*kKV*index, value)
+}
+
+func (p btreeDataPage) cmp(a btreeStore, c func(a, b []byte) int, keyA []byte, keyBIndex int) (y int, err error) {
+	var keyB []byte
+	if keyB, err = p.content(a, 15+2*kKV*keyBIndex); err != nil {
+		return
+	}
+
+	return c(keyA, keyB), nil
+}
+
+func (p btreeDataPage) copy(src btreeDataPage, di, si, n int) {
+	do, so := 15+2*kKV*di, 15+2*kKV*si
+	copy(p[do:do+2*kKV*n], src[so:])
+}
+
+// {p,left} dirty on exit
+func (p btreeDataPage) moveLeft(left btreeDataPage, n int) (btreeDataPage, btreeDataPage) {
+	nl, np := left.len(), p.len()
+	left = left.setLen(nl + n)
+	left.copy(p, nl, 0, n)
+	p.copy(p, 0, n, np-n)
+	return p.setLen(np - n), left
+}
+
+func (p btreeDataPage) moveRight(right btreeDataPage, n int) (btreeDataPage, btreeDataPage) {
+	nr, np := right.len(), p.len()
+	right = right.setLen(nr + n)
+	right.copy(right, n, 0, nr)
+	right.copy(p, 0, np-n, n)
+	return p.setLen(np - n), right
+}
+
+func (p btreeDataPage) insertItem(a btreeStore, index int, key, value []byte) (btreeDataPage, error) {
+	p = p.insert(index)
+	di, sz := 15+2*kKV*index, 2*kKV
+	copy(p[di:di+sz], zeros[:sz])
+	if err := p.setKey(a, index, key); err != nil {
+		return nil, err
+	}
+	return p, p.setValue(a, index, value)
+}
+
+func (p btreeDataPage) split(a btreeStore, root, ph, parent int64, parentIndex, index int, key, value []byte) (btreeDataPage, error) {
+	right, rh, err := newBTreeDataPageAlloc(a)
+	if err != nil {
+		return nil, err
+	}
+
+	if next := p.next(); next != 0 {
+		right.setNext(p.next())
+		nxh := right.next()
+		pnx := buffer.Get(maxBuf)
+		defer buffer.Put(pnx)
+		nx := *pnx
+		if nx, err = a.Get(nx, nxh); err != nil {
+			return nil, err
+		}
+
+		btreeDataPage(nx).setPrev(rh)
+		if err = a.Realloc(nxh, nx); err != nil {
+			return nil, err
+		}
+	}
+
+	p.setNext(rh)
+	right.setPrev(ph)
+	right = right.setLen(kData)
+	right.copy(p, 0, kData, kData)
+	p = p.setLen(kData)
+
+	if parentIndex >= 0 {
+		ppp := buffer.Get(maxBuf)
+		defer buffer.Put(ppp)
+		pp := btreeIndexPage(*ppp)
+		if pp, err = a.Get(pp, parent); err != nil {
+			return nil, err
+		}
+
+		pp = pp.insert3(parentIndex, rh, rh)
+		if err = a.Realloc(parent, pp); err != nil {
+			return nil, err
+		}
+
+	} else {
+		nr := newBTreeIndexPage(ph)
+		nr = nr.insert3(0, rh, rh)
+		nrh, err := a.Alloc(nr)
+		if err != nil {
+			return nil, err
+		}
+
+		if err = a.Realloc(root, h2b(make([]byte, 7), nrh)); err != nil {
+			return nil, err
+		}
+
+	}
+	if index > kData {
+		if right, err = right.insertItem(a, index-kData, key, value); err != nil {
+			return nil, err
+		}
+	} else {
+		if p, err = p.insertItem(a, index, key, value); err != nil {
+			return nil, err
+		}
+	}
+	if err = a.Realloc(ph, p); err != nil {
+		return nil, err
+	}
+
+	return p, a.Realloc(rh, right)
+}
+
+func (p btreeDataPage) overflow(a btreeStore, root, ph, parent int64, parentIndex, index int, key, value []byte) (btreeDataPage, error) {
+	leftH, rightH, err := checkSiblings(a, parent, parentIndex)
+	if err != nil {
+		return nil, err
+	}
+
+	if leftH != 0 {
+		pleft := buffer.Get(maxBuf)
+		defer buffer.Put(pleft)
+		left := btreeDataPage(*pleft)
+		if left, err = a.Get(left, leftH); err != nil {
+			return nil, err
+		}
+
+		if left.len() < 2*kData && index > 0 {
+
+			p, left = p.moveLeft(left, 1)
+			if err = a.Realloc(leftH, left); err != nil {
+				return nil, err
+			}
+
+			if p, err = p.insertItem(a, index-1, key, value); err != nil {
+				return nil, err
+			}
+
+			return p, a.Realloc(ph, p)
+		}
+	}
+
+	if rightH != 0 {
+		pright := buffer.Get(maxBuf)
+		defer buffer.Put(pright)
+		right := btreeDataPage(*pright)
+		if right, err = a.Get(right, rightH); err != nil {
+			return nil, err
+		}
+
+		if right.len() < 2*kData {
+			if index < 2*kData {
+				p, right = p.moveRight(right, 1)
+				if err = a.Realloc(rightH, right); err != nil {
+					return nil, err
+				}
+
+				if p, err = p.insertItem(a, index, key, value); err != nil {
+					return nil, err
+				}
+
+				return p, a.Realloc(ph, p)
+			} else {
+				if right, err = right.insertItem(a, 0, key, value); err != nil {
+					return nil, err
+				}
+
+				return p, a.Realloc(rightH, right)
+			}
+		}
+	}
+	return p.split(a, root, ph, parent, parentIndex, index, key, value)
+}
+
+func (p btreeDataPage) swap(a btreeStore, di int, value []byte, canOverwrite bool) (oldValue []byte, err error) {
+	if oldValue, err = p.value(a, di); err != nil {
+		return
+	}
+
+	if !canOverwrite {
+		return
+	}
+
+	oldValue = append([]byte(nil), oldValue...)
+	err = p.setValue(a, di, value)
+	return
+}
+
+type btreePage []byte
+
+func (p btreePage) isIndex() bool {
+	return p[0] == tagBTreeIndexPage
+}
+
+func (p btreePage) len() int {
+	if p.isIndex() {
+		return btreeIndexPage(p).len()
+	}
+
+	return btreeDataPage(p).len()
+}
+
+func (p btreePage) find(a btreeStore, c func(a, b []byte) int, key []byte) (index int, ok bool, err error) {
+	l := 0
+	h := p.len() - 1
+	isIndex := p.isIndex()
+	if c == nil {
+		c = bytes.Compare
+	}
+	for l <= h {
+		index = (l + h) >> 1
+		var cmp int
+		if isIndex {
+			if cmp, err = btreeIndexPage(p).cmp(a, c, key, index); err != nil {
+				return
+			}
+		} else {
+			if cmp, err = btreeDataPage(p).cmp(a, c, key, index); err != nil {
+				return
+			}
+		}
+		switch ok = cmp == 0; {
+		case cmp > 0:
+			l = index + 1
+		case ok:
+			return
+		default:
+			h = index - 1
+		}
+	}
+	return l, false, nil
+}
+
+// p is dirty after extract!
+func (p btreeDataPage) extract(a btreeStore, index int) (btreeDataPage, []byte, error) {
+	value, err := p.valueCopy(a, index)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	if _, h := p.keyField(index); h != 0 {
+		if err = a.Free(h); err != nil {
+			return nil, nil, err
+		}
+	}
+
+	if _, h := p.valueField(index); h != 0 {
+		if err = a.Free(h); err != nil {
+			return nil, nil, err
+		}
+	}
+
+	n := p.len() - 1
+	if index < n {
+		p.copy(p, index, index+1, n-index)
+	}
+	return p.setLen(n), value, nil
+}
+
+func checkSiblings(a btreeStore, parent int64, parentIndex int) (left, right int64, err error) {
+	if parentIndex >= 0 {
+		pp := buffer.Get(maxBuf)
+		defer buffer.Put(pp)
+		p := btreeIndexPage(*pp)
+		if p, err = a.Get(p, parent); err != nil {
+			return
+		}
+
+		if parentIndex > 0 {
+			left = p.child(parentIndex - 1)
+		}
+		if parentIndex < p.len() {
+			right = p.child(parentIndex + 1)
+		}
+	}
+	return
+}
+
+// underflow must persist all changes made.
+func (p btreeDataPage) underflow(a btreeStore, root, iroot, parent, ph int64, parentIndex int) (err error) {
+	lh, rh, err := checkSiblings(a, parent, parentIndex)
+	if err != nil {
+		return err
+	}
+
+	if lh != 0 {
+		pleft := buffer.Get(maxBuf)
+		defer buffer.Put(pleft)
+		left := *pleft
+		if left, err = a.Get(left, lh); err != nil {
+			return err
+		}
+
+		if btreeDataPage(left).len()+p.len() >= 2*kData {
+			left, p = btreeDataPage(left).moveRight(p, 1)
+			if err = a.Realloc(lh, left); err != nil {
+				return err
+			}
+
+			return a.Realloc(ph, p)
+		}
+	}
+
+	if rh != 0 {
+		pright := buffer.Get(maxBuf)
+		defer buffer.Put(pright)
+		right := *pright
+		if right, err = a.Get(right, rh); err != nil {
+			return err
+		}
+
+		if p.len()+btreeDataPage(right).len() > 2*kData {
+			right, p = btreeDataPage(right).moveLeft(p, 1)
+			if err = a.Realloc(rh, right); err != nil {
+				return err
+			}
+
+			return a.Realloc(ph, p)
+		}
+	}
+
+	if lh != 0 {
+		pleft := buffer.Get(maxBuf)
+		defer buffer.Put(pleft)
+		left := *pleft
+		if left, err = a.Get(left, lh); err != nil {
+			return err
+		}
+
+		if err = a.Realloc(ph, p); err != nil {
+			return err
+		}
+
+		return btreeDataPage(left).concat(a, root, iroot, parent, lh, ph, parentIndex-1)
+	}
+
+	return p.concat(a, root, iroot, parent, ph, rh, parentIndex)
+}
+
+// concat must persist all changes made.
+func (p btreeDataPage) concat(a btreeStore, root, iroot, parent, ph, rh int64, parentIndex int) (err error) {
+	pright := buffer.Get(maxBuf)
+	defer buffer.Put(pright)
+	right := *pright
+	if right, err = a.Get(right, rh); err != nil {
+		return err
+	}
+
+	right, p = btreeDataPage(right).moveLeft(p, btreeDataPage(right).len())
+	nxh := btreeDataPage(right).next()
+	if nxh != 0 {
+		pnx := buffer.Get(maxBuf)
+		defer buffer.Put(pnx)
+		nx := *pnx
+		if nx, err = a.Get(nx, nxh); err != nil {
+			return err
+		}
+
+		btreeDataPage(nx).setPrev(ph)
+		if err = a.Realloc(nxh, nx); err != nil {
+			return err
+		}
+	}
+	p.setNext(nxh)
+	if err = a.Free(rh); err != nil {
+		return err
+	}
+
+	ppp := buffer.Get(maxBuf)
+	defer buffer.Put(ppp)
+	pp := *ppp
+	if pp, err = a.Get(pp, parent); err != nil {
+		return err
+	}
+
+	if btreeIndexPage(pp).len() > 1 {
+		pp = btreeIndexPage(pp).extract(parentIndex)
+		btreeIndexPage(pp).setChild(parentIndex, ph)
+		if err = a.Realloc(parent, pp); err != nil {
+			return err
+		}
+
+		return a.Realloc(ph, p)
+	}
+
+	if err = a.Free(iroot); err != nil {
+		return err
+	}
+
+	if err = a.Realloc(ph, p); err != nil {
+		return err
+	}
+
+	var b7 [7]byte
+	return a.Realloc(root, h2b(b7[:], ph))
+}
+
+// external "root" is stable and contains the real root.
+type btree int64
+
+func newBTree(a btreeStore) (btree, error) {
+	r, err := a.Alloc(zeros[:7])
+	return btree(r), err
+}
+
+func (root btree) String(a btreeStore) string {
+	pr := buffer.Get(16)
+	defer buffer.Put(pr)
+	r := *pr
+	r, err := a.Get(r, int64(root))
+	if err != nil {
+		panic(err)
+	}
+
+	iroot := b2h(r)
+	m := map[int64]bool{int64(root): true}
+
+	s := []string{fmt.Sprintf("tree %#x -> %#x\n====", root, iroot)}
+	if iroot == 0 {
+		return s[0]
+	}
+
+	var f func(int64, string)
+	f = func(h int64, ind string) {
+		if m[h] {
+			return
+		}
+
+		m[h] = true
+		pb := buffer.Get(maxBuf)
+		defer buffer.Put(pb)
+		b := btreePage(*pb)
+		var err error
+		if b, err = a.Get(b, h); err != nil {
+			panic(err)
+		}
+
+		s = append(s, fmt.Sprintf("%s@%#x", ind, h))
+		switch b.isIndex() {
+		case true:
+			da := []int64{}
+			b := btreeIndexPage(b)
+			for i := 0; i < b.len(); i++ {
+				c, d := b.child(i), b.dataPage(i)
+				s = append(s, fmt.Sprintf("%schild[%d] %#x dataPage[%d] %#x", ind, i, c, i, d))
+				da = append(da, c)
+				da = append(da, d)
+			}
+			i := b.len()
+			c := b.child(i)
+			s = append(s, fmt.Sprintf("%schild[%d] %#x", ind, i, c))
+			for _, c := range da {
+				f(c, ind+"  ")
+			}
+			f(c, ind+"  ")
+		case false:
+			b := btreeDataPage(b)
+			s = append(s, fmt.Sprintf("%sprev %#x next %#x", ind, b.prev(), b.next()))
+			for i := 0; i < b.len(); i++ {
+				k, err := b.key(a, i)
+				if err != nil {
+					panic(err)
+				}
+
+				v, err := b.value(a, i)
+				if err != nil {
+					panic(err)
+				}
+
+				s = append(s, fmt.Sprintf("%sK[%d]|% x| V[%d]|% x|", ind, i, k, i, v))
+			}
+		}
+	}
+
+	f(int64(iroot), "")
+	return strings.Join(s, "\n")
+}
+
+func (root btree) put(dst []byte, a btreeStore, c func(a, b []byte) int, key, value []byte, canOverwrite bool) (prev []byte, err error) {
+	prev, _, err = root.put2(dst, a, c, key, func(key, old []byte) (new []byte, write bool, err error) {
+		new, write = value, true
+		return
+	})
+	return
+}
+
+func (root btree) put2(dst []byte, a btreeStore, c func(a, b []byte) int, key []byte, upd func(key, old []byte) (new []byte, write bool, err error)) (old []byte, written bool, err error) {
+	var r, value []byte
+	if r, err = a.Get(dst, int64(root)); err != nil {
+		return
+	}
+
+	iroot := b2h(r)
+	var h int64
+	if iroot == 0 {
+		p := newBTreeDataPage()
+		if value, written, err = upd(key, nil); err != nil || !written {
+			return
+		}
+
+		if p, err = p.insertItem(a, 0, key, value); err != nil {
+			return
+		}
+
+		h, err = a.Alloc(p)
+		if err != nil {
+			return nil, true, err
+		}
+
+		err = a.Realloc(int64(root), h2b(r, h)[:7])
+		return
+	}
+
+	parentIndex := -1
+	var parent int64
+	ph := iroot
+
+	pp := buffer.Get(maxBuf)
+	defer buffer.Put(pp)
+	p := *pp
+
+	for {
+		if p, err = a.Get(p[:cap(p)], ph); err != nil {
+			return
+		}
+
+		var index int
+		var ok bool
+
+		if index, ok, err = btreePage(p).find(a, c, key); err != nil {
+			return
+		}
+
+		switch {
+		case ok: // Key found
+			if btreePage(p).isIndex() {
+				ph = btreeIndexPage(p).dataPage(index)
+				if p, err = a.Get(p, ph); err != nil {
+					return
+				}
+
+				if old, err = btreeDataPage(p).valueCopy(a, 0); err != nil {
+					return
+				}
+
+				if value, written, err = upd(key, old); err != nil || !written {
+					return
+				}
+
+				if _, err = btreeDataPage(p).swap(a, 0, value, true); err != nil {
+					return
+				}
+
+				err = a.Realloc(ph, p)
+				return
+			}
+
+			if old, err = btreeDataPage(p).valueCopy(a, index); err != nil {
+				return
+			}
+
+			if value, written, err = upd(key, old); err != nil || !written {
+				return
+			}
+
+			if _, err = btreeDataPage(p).swap(a, index, value, true); err != nil {
+				return
+			}
+
+			err = a.Realloc(ph, p)
+			return
+		case btreePage(p).isIndex():
+			if btreePage(p).len() > 2*kIndex {
+				if p, err = btreeIndexPage(p).split(a, root, &ph, parent, parentIndex, &index); err != nil {
+					return
+				}
+			}
+			parentIndex = index
+			parent = ph
+			ph = btreeIndexPage(p).child(index)
+		default:
+			if value, written, err = upd(key, nil); err != nil || !written {
+				return
+			}
+
+			if btreePage(p).len() < 2*kData { // page is not full
+				if p, err = btreeDataPage(p).insertItem(a, index, key, value); err != nil {
+					return
+				}
+
+				err = a.Realloc(ph, p)
+				return
+			}
+
+			// page is full
+			p, err = btreeDataPage(p).overflow(a, int64(root), ph, parent, parentIndex, index, key, value)
+			return
+		}
+	}
+}
+
+//TODO actually use 'dst' to return 'value'
+func (root btree) get(a btreeStore, dst []byte, c func(a, b []byte) int, key []byte) (b []byte, err error) {
+	var r []byte
+	if r, err = a.Get(dst, int64(root)); err != nil {
+		return
+	}
+
+	iroot := b2h(r)
+	if iroot == 0 {
+		return
+	}
+
+	ph := iroot
+
+	for {
+		var p btreePage
+		if p, err = a.Get(p, ph); err != nil {
+			return
+		}
+
+		var index int
+		var ok bool
+		if index, ok, err = p.find(a, c, key); err != nil {
+			return
+		}
+
+		switch {
+		case ok:
+			if p.isIndex() {
+				dh := btreeIndexPage(p).dataPage(index)
+				dp, err := a.Get(dst, dh)
+				if err != nil {
+					return nil, err
+				}
+
+				return btreeDataPage(dp).value(a, 0)
+			}
+
+			return btreeDataPage(p).value(a, index)
+		case p.isIndex():
+			ph = btreeIndexPage(p).child(index)
+		default:
+			return
+		}
+	}
+}
+
+//TODO actually use 'dst' to return 'value'
+func (root btree) extract(a btreeStore, dst []byte, c func(a, b []byte) int, key []byte) (value []byte, err error) {
+	var r []byte
+	if r, err = a.Get(dst, int64(root)); err != nil {
+		return
+	}
+
+	iroot := b2h(r)
+	if iroot == 0 {
+		return
+	}
+
+	ph := iroot
+	parentIndex := -1
+	var parent int64
+
+	pp := buffer.Get(maxBuf)
+	defer buffer.Put(pp)
+	p := *pp
+
+	for {
+		if p, err = a.Get(p[:cap(p)], ph); err != nil {
+			return
+		}
+
+		var index int
+		var ok bool
+		if index, ok, err = btreePage(p).find(a, c, key); err != nil {
+			return
+		}
+
+		if ok {
+			if btreePage(p).isIndex() {
+				dph := btreeIndexPage(p).dataPage(index)
+				dp, err := a.Get(dst, dph)
+				if err != nil {
+					return nil, err
+				}
+
+				if btreeDataPage(dp).len() > kData {
+					if dp, value, err = btreeDataPage(dp).extract(a, 0); err != nil {
+						return nil, err
+					}
+
+					return value, a.Realloc(dph, dp)
+				}
+
+				if btreeIndexPage(p).len() < kIndex && ph != iroot {
+					var err error
+					if p, err = btreeIndexPage(p).underflow(a, int64(root), iroot, parent, &ph, parentIndex, &index); err != nil {
+						return nil, err
+					}
+				}
+				parentIndex = index + 1
+				parent = ph
+				ph = btreeIndexPage(p).child(parentIndex)
+				continue
+			}
+
+			p, value, err = btreeDataPage(p).extract(a, index)
+			if btreePage(p).len() >= kData {
+				err = a.Realloc(ph, p)
+				return
+			}
+
+			if ph != iroot {
+				err = btreeDataPage(p).underflow(a, int64(root), iroot, parent, ph, parentIndex)
+				return
+			}
+
+			if btreePage(p).len() == 0 {
+				if err = a.Free(ph); err != nil {
+					return
+				}
+
+				err = a.Realloc(int64(root), zeros[:7])
+				return
+			}
+			err = a.Realloc(ph, p)
+			return
+		}
+
+		if !btreePage(p).isIndex() {
+			return
+		}
+
+		if btreePage(p).len() < kIndex && ph != iroot {
+			if p, err = btreeIndexPage(p).underflow(a, int64(root), iroot, parent, &ph, parentIndex, &index); err != nil {
+				return nil, err
+			}
+		}
+		parentIndex = index
+		parent = ph
+		ph = btreeIndexPage(p).child(index)
+	}
+}
+
+func (root btree) deleteAny(a btreeStore) (bool, error) {
+	pr := buffer.Get(7)
+	defer buffer.Put(pr)
+	r := *pr
+	var err error
+	if r, err = a.Get(r, int64(root)); err != nil {
+		return false, err
+	}
+
+	iroot := b2h(r)
+	if iroot == 0 {
+		return true, nil
+	}
+
+	ph := iroot
+	parentIndex := -1
+	var parent int64
+	pp := buffer.Get(maxBuf)
+	defer buffer.Put(pp)
+	p := *pp
+
+	for {
+		if p, err = a.Get(p, ph); err != nil {
+			return false, err
+		}
+
+		index := btreePage(p).len() / 2
+		if btreePage(p).isIndex() {
+			dph := btreeIndexPage(p).dataPage(index)
+			pdp := buffer.Get(maxBuf)
+			defer buffer.Put(pdp)
+			dp := *pdp
+			if dp, err = a.Get(dp, dph); err != nil {
+				return false, err
+			}
+
+			if btreeDataPage(dp).len() > kData {
+				if dp, _, err = btreeDataPage(dp).extract(a, 0); err != nil {
+					return false, err
+				}
+
+				return false, a.Realloc(dph, dp)
+			}
+
+			if btreeIndexPage(p).len() < kIndex && ph != iroot {
+				if p, err = btreeIndexPage(p).underflow(a, int64(root), iroot, parent, &ph, parentIndex, &index); err != nil {
+					return false, err
+				}
+			}
+			parentIndex = index + 1
+			parent = ph
+			ph = btreeIndexPage(p).child(parentIndex)
+			continue
+		}
+
+		p, _, err = btreeDataPage(p).extract(a, index)
+		if btreePage(p).len() >= kData {
+			err = a.Realloc(ph, p)
+			return false, err
+		}
+
+		if ph != iroot {
+			err = btreeDataPage(p).underflow(a, int64(root), iroot, parent, ph, parentIndex)
+			return false, err
+		}
+
+		if btreePage(p).len() == 0 {
+			if err = a.Free(ph); err != nil {
+				return true, err
+			}
+
+			return true, a.Realloc(int64(root), zeros[:7])
+		}
+
+		return false, a.Realloc(ph, p)
+	}
+}
+
+func (root btree) first(a btreeStore) (ph int64, p btreeDataPage, err error) {
+	pr := buffer.Get(7)
+	defer buffer.Put(pr)
+	r := *pr
+	if r, err = a.Get(r, int64(root)); err != nil {
+		return
+	}
+
+	for ph = b2h(r); ph != 0; ph = btreeIndexPage(p).child(0) {
+		if p, err = a.Get(p, ph); err != nil {
+			return
+		}
+
+		if !btreePage(p).isIndex() {
+			break
+		}
+	}
+
+	return
+}
+
+func (root btree) last(a btreeStore) (ph int64, p btreeDataPage, err error) {
+	pr := buffer.Get(7)
+	defer buffer.Put(pr)
+	r := *pr
+	if r, err = a.Get(r, int64(root)); err != nil {
+		return
+	}
+
+	for ph = b2h(r); ph != 0; ph = btreeIndexPage(p).child(btreeIndexPage(p).len()) {
+		if p, err = a.Get(p, ph); err != nil {
+			return
+		}
+
+		if !btreePage(p).isIndex() {
+			break
+		}
+	}
+
+	return
+}
+
+// key >= p[index].key
+func (root btree) seek(a btreeStore, c func(a, b []byte) int, key []byte) (p btreeDataPage, index int, equal bool, err error) {
+	pr := buffer.Get(7)
+	defer buffer.Put(pr)
+	r := *pr
+	if r, err = a.Get(r, int64(root)); err != nil {
+		return
+	}
+
+	for ph := b2h(r); ph != 0; ph = btreeIndexPage(p).child(index) {
+		if p, err = a.Get(p, ph); err != nil {
+			break
+		}
+
+		if index, equal, err = btreePage(p).find(a, c, key); err != nil {
+			break
+		}
+
+		if equal {
+			if !btreePage(p).isIndex() {
+				break
+			}
+
+			p, err = a.Get(p, btreeIndexPage(p).dataPage(index))
+			index = 0
+			break
+		}
+
+		if !btreePage(p).isIndex() {
+			break
+		}
+	}
+	return
+}
+
+func (root btree) clear(a btreeStore) (err error) {
+	pr := buffer.Get(7)
+	defer buffer.Put(pr)
+	r := *pr
+	if r, err = a.Get(r, int64(root)); err != nil {
+		return
+	}
+
+	iroot := b2h(r)
+	if iroot == 0 {
+		return
+	}
+
+	if err = root.clear2(a, iroot); err != nil {
+		return
+	}
+
+	var b [7]byte
+	return a.Realloc(int64(root), b[:])
+}
+
+func (root btree) clear2(a btreeStore, ph int64) (err error) {
+	pp := buffer.Get(maxBuf)
+	defer buffer.Put(pp)
+	p := *pp
+	if p, err = a.Get(p, ph); err != nil {
+		return
+	}
+
+	switch btreePage(p).isIndex() {
+	case true:
+		ip := btreeIndexPage(p)
+		for i := 0; i <= ip.len(); i++ {
+			root.clear2(a, ip.child(i))
+
+		}
+	case false:
+		dp := btreeDataPage(p)
+		for i := 0; i < dp.len(); i++ {
+			if err = dp.setKey(a, i, nil); err != nil {
+				return
+			}
+
+			if err = dp.setValue(a, i, nil); err != nil {
+				return
+			}
+		}
+	}
+	return a.Free(ph)
+}

+ 170 - 0
vendor/github.com/cznic/lldb/errors.go

@@ -0,0 +1,170 @@
+// Copyright 2014 The lldb Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Some errors returned by this package.
+//
+// Note that this package can return more errors than declared here, for
+// example io.EOF from Filer.ReadAt().
+
+package lldb
+
+import (
+	"fmt"
+)
+
+// ErrDecodeScalars is possibly returned from DecodeScalars
+type ErrDecodeScalars struct {
+	B []byte // Data being decoded
+	I int    // offending offset
+}
+
+// Error implements the built in error type.
+func (e *ErrDecodeScalars) Error() string {
+	return fmt.Sprintf("DecodeScalars: corrupted data @ %d/%d", e.I, len(e.B))
+}
+
+// ErrINVAL reports invalid values passed as parameters, for example negative
+// offsets where only non-negative ones are allowed or read from the DB.
+type ErrINVAL struct {
+	Src string
+	Val interface{}
+}
+
+// Error implements the built in error type.
+func (e *ErrINVAL) Error() string {
+	return fmt.Sprintf("%s: %+v", e.Src, e.Val)
+}
+
+// ErrPERM is for example reported when a Filer is closed while BeginUpdate(s)
+// are not balanced with EndUpdate(s)/Rollback(s) or when EndUpdate or Rollback
+// is invoked which is not paired with a BeginUpdate.
+type ErrPERM struct {
+	Src string
+}
+
+// Error implements the built in error type.
+func (e *ErrPERM) Error() string {
+	return fmt.Sprintf("%s: Operation not permitted", string(e.Src))
+}
+
+// ErrTag represents an ErrILSEQ kind.
+type ErrType int
+
+// ErrILSEQ types
+const (
+	ErrOther ErrType = iota
+
+	ErrAdjacentFree          // Adjacent free blocks (.Off and .Arg)
+	ErrDecompress            // Used compressed block: corrupted compression
+	ErrExpFreeTag            // Expected a free block tag, got .Arg
+	ErrExpUsedTag            // Expected a used block tag, got .Arg
+	ErrFLT                   // Free block is invalid or referenced multiple times
+	ErrFLTLoad               // FLT truncated to .Off, need size >= .Arg
+	ErrFLTSize               // Free block size (.Arg) doesn't belong to its list min size: .Arg2
+	ErrFileSize              // File .Name size (.Arg) != 0 (mod 16)
+	ErrFreeChaining          // Free block, .prev.next doesn't point back to this block
+	ErrFreeTailBlock         // Last block is free
+	ErrHead                  // Head of a free block list has non zero Prev (.Arg)
+	ErrInvalidRelocTarget    // Reloc doesn't target (.Arg) a short or long used block
+	ErrInvalidWAL            // Corrupted write ahead log. .Name: file name, .More: more
+	ErrLongFreeBlkTooLong    // Long free block spans beyond EOF, size .Arg
+	ErrLongFreeBlkTooShort   // Long free block must have at least 2 atoms, got only .Arg
+	ErrLongFreeNextBeyondEOF // Long free block .Next (.Arg) spans beyond EOF
+	ErrLongFreePrevBeyondEOF // Long free block .Prev (.Arg) spans beyond EOF
+	ErrLongFreeTailTag       // Expected a long free block tail tag, got .Arg
+	ErrLostFreeBlock         // Free block is not in any FLT list
+	ErrNullReloc             // Used reloc block with nil target
+	ErrRelocBeyondEOF        // Used reloc points (.Arg) beyond EOF
+	ErrShortFreeTailTag      // Expected a short free block tail tag, got .Arg
+	ErrSmall                 // Request for a free block (.Arg) returned a too small one (.Arg2) at .Off
+	ErrTailTag               // Block at .Off has invalid tail CC (compression code) tag, got .Arg
+	ErrUnexpReloc            // Unexpected reloc block referred to from reloc block .Arg
+	ErrVerifyPadding         // Used block has nonzero padding
+	ErrVerifyTailSize        // Long free block size .Arg but tail size .Arg2
+	ErrVerifyUsedSpan        // Used block size (.Arg) spans beyond EOF
+)
+
+// ErrILSEQ reports a corrupted file format. Details in fields according to Type.
+type ErrILSEQ struct {
+	Type ErrType
+	Off  int64
+	Arg  int64
+	Arg2 int64
+	Arg3 int64
+	Name string
+	More interface{}
+}
+
+// Error implements the built in error type.
+func (e *ErrILSEQ) Error() string {
+	switch e.Type {
+	case ErrAdjacentFree:
+		return fmt.Sprintf("Adjacent free blocks at offset %#x and %#x", e.Off, e.Arg)
+	case ErrDecompress:
+		return fmt.Sprintf("Compressed block at offset %#x: Corrupted compressed content", e.Off)
+	case ErrExpFreeTag:
+		return fmt.Sprintf("Block at offset %#x: Expected a free block tag, got %#2x", e.Off, e.Arg)
+	case ErrExpUsedTag:
+		return fmt.Sprintf("Block at ofset %#x: Expected a used block tag, got %#2x", e.Off, e.Arg)
+	case ErrFLT:
+		return fmt.Sprintf("Free block at offset %#x is invalid or referenced multiple times", e.Off)
+	case ErrFLTLoad:
+		return fmt.Sprintf("FLT truncated to size %d, expected at least %d", e.Off, e.Arg)
+	case ErrFLTSize:
+		return fmt.Sprintf("Free block at offset %#x has size (%#x) should be at least (%#x)", e.Off, e.Arg, e.Arg2)
+	case ErrFileSize:
+		return fmt.Sprintf("File %q size (%#x) != 0 (mod 16)", e.Name, e.Arg)
+	case ErrFreeChaining:
+		return fmt.Sprintf("Free block at offset %#x: .prev.next doesn point back here.", e.Off)
+	case ErrFreeTailBlock:
+		return fmt.Sprintf("Free block at offset %#x: Cannot be last file block", e.Off)
+	case ErrHead:
+		return fmt.Sprintf("Block at offset %#x: Head of free block list has non zero .prev %#x", e.Off, e.Arg)
+	case ErrInvalidRelocTarget:
+		return fmt.Sprintf("Used reloc block at offset %#x: Target (%#x) is not a short or long used block", e.Off, e.Arg)
+	case ErrInvalidWAL:
+		return fmt.Sprintf("Corrupted write ahead log file: %q %v", e.Name, e.More)
+	case ErrLongFreeBlkTooLong:
+		return fmt.Sprintf("Long free block at offset %#x: Size (%#x) beyond EOF", e.Off, e.Arg)
+	case ErrLongFreeBlkTooShort:
+		return fmt.Sprintf("Long free block at offset %#x: Size (%#x) too small", e.Off, e.Arg)
+	case ErrLongFreeNextBeyondEOF:
+		return fmt.Sprintf("Long free block at offset %#x: Next (%#x) points beyond EOF", e.Off, e.Arg)
+	case ErrLongFreePrevBeyondEOF:
+		return fmt.Sprintf("Long free block at offset %#x: Prev (%#x) points beyond EOF", e.Off, e.Arg)
+	case ErrLongFreeTailTag:
+		return fmt.Sprintf("Block at offset %#x: Expected long free tail tag, got %#2x", e.Off, e.Arg)
+	case ErrLostFreeBlock:
+		return fmt.Sprintf("Free block at offset %#x: not in any FLT list", e.Off)
+	case ErrNullReloc:
+		return fmt.Sprintf("Used reloc block at offset %#x: Nil target", e.Off)
+	case ErrRelocBeyondEOF:
+		return fmt.Sprintf("Used reloc block at offset %#x: Link (%#x) points beyond EOF", e.Off, e.Arg)
+	case ErrShortFreeTailTag:
+		return fmt.Sprintf("Block at offset %#x: Expected short free tail tag, got %#2x", e.Off, e.Arg)
+	case ErrSmall:
+		return fmt.Sprintf("Request for of free block of size %d returned a too small (%d) one at offset %#x", e.Arg, e.Arg2, e.Off)
+	case ErrTailTag:
+		return fmt.Sprintf("Block at offset %#x: Invalid tail CC tag, got %#2x", e.Off, e.Arg)
+	case ErrUnexpReloc:
+		return fmt.Sprintf("Block at offset %#x: Unexpected reloc block. Referred to from reloc block at offset %#x", e.Off, e.Arg)
+	case ErrVerifyPadding:
+		return fmt.Sprintf("Used block at offset %#x: Nonzero padding", e.Off)
+	case ErrVerifyTailSize:
+		return fmt.Sprintf("Long free block at offset %#x: Size %#x, but tail size %#x", e.Off, e.Arg, e.Arg2)
+	case ErrVerifyUsedSpan:
+		return fmt.Sprintf("Used block at offset %#x: Size %#x spans beyond EOF", e.Off, e.Arg)
+	}
+
+	more := ""
+	if e.More != nil {
+		more = fmt.Sprintf(", %v", e.More)
+	}
+	off := ""
+	if e.Off != 0 {
+		off = fmt.Sprintf(", off: %#x", e.Off)
+	}
+
+	return fmt.Sprintf("Error%s%s", off, more)
+}

+ 1999 - 0
vendor/github.com/cznic/lldb/falloc.go

@@ -0,0 +1,1999 @@
+// Copyright 2014 The lldb Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The storage space management.
+
+package lldb
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"io"
+	"sort"
+	"strings"
+	"sync"
+
+	"github.com/cznic/internal/buffer"
+	"github.com/cznic/mathutil"
+	"github.com/cznic/zappy"
+)
+
+const (
+	maxBuf = maxRq + 20
+)
+
+// Options are passed to the NewAllocator to amend some configuration.  The
+// compatibility promise is the same as of struct types in the Go standard
+// library - introducing changes can be made only by adding new exported
+// fields, which is backward compatible as long as client code uses field names
+// to assign values of imported struct types literals.
+//
+// NOTE: No options are currently defined.
+type Options struct{}
+
+// AllocStats record statistics about a Filer. It can be optionally filled by
+// Allocator.Verify, if successful.
+type AllocStats struct {
+	Handles     int64           // total valid handles in use
+	Compression int64           // number of compressed blocks
+	TotalAtoms  int64           // total number of atoms == AllocAtoms + FreeAtoms
+	AllocBytes  int64           // bytes allocated (after decompression, if/where used)
+	AllocAtoms  int64           // atoms allocated/used, including relocation atoms
+	Relocations int64           // number of relocated used blocks
+	FreeAtoms   int64           // atoms unused
+	AllocMap    map[int64]int64 // allocated block size in atoms -> count of such blocks
+	FreeMap     map[int64]int64 // free block size in atoms -> count of such blocks
+}
+
+/*
+
+Allocator implements "raw" storage space management (allocation and
+deallocation) for a low level of a DB engine.  The storage is an abstraction
+provided by a Filer.
+
+The terms MUST or MUST NOT, if/where used in the documentation of Allocator,
+written in all caps as seen here, are a requirement for any possible
+alternative implementations aiming for compatibility with this one.
+
+Filer file
+
+A Filer file, or simply 'file', is a linear, contiguous sequence of blocks.
+Blocks may be either free (currently unused) or allocated (currently used).
+Some blocks may eventually become virtual in a sense as they may not be
+realized in the storage (sparse files).
+
+Free Lists Table
+
+File starts with a FLT. This table records heads of 14 doubly linked free
+lists. The zero based index (I) vs minimal size of free blocks in that list,
+except the last one which registers free blocks of size 4112+ atoms:
+
+	MinSize == 2^I
+
+	For example 0 -> 1, 1 -> 2, ... 12 -> 4096.
+
+Each entry in the FLT is 8 bytes in netwtork order, MSB MUST be zero, ie. the
+slot value is effectively only 7 bytes. The value is the handle of the head of
+the respective doubly linked free list. The FLT size is 14*8 == 112(0x70)
+bytes. If the free blocks list for any particular size is empty, the respective
+FLT slot is zero. Sizes of free blocks in one list MUST NOT overlap with sizes
+of free lists in other list. For example, even though a free block of size 2
+technically is of minimal size >= 1, it MUST NOT be put to the list for slot 0
+(minimal size 1), but in slot 1( minimal size 2).
+
+	slot 0:		sizes [1, 2)
+	slot 1:		sizes [2, 4)
+	slot 2:		sizes [4, 8)
+	...
+	slot 11:	sizes [2048, 4096)
+	slot 12:	sizes [4096, 4112)
+	slot 13:	sizes [4112, inf)
+
+The last FLT slot collects all free blocks bigger than its minimal size. That
+still respects the 'no overlap' invariant.
+
+File blocks
+
+A block is a linear, contiguous sequence of atoms. The first and last atoms of
+a block provide information about, for example, whether the block is free or
+used, what is the size of the block, etc.  Details are discussed elsewhere. The
+first block of a file starts immediately after FLT, ie. at file offset
+112(0x70).
+
+Block atoms
+
+An atom is a fixed size piece of a block (and thus of a file too); it is 16
+bytes long. A consequence is that for a valid file:
+
+ filesize == 0 (mod 16)
+
+The first atom of the first block is considered to be atom #1.
+
+Block handles
+
+A handle is an integer referring to a block. The reference is the number of the
+atom the block starts with. Put in other way:
+
+ handle == offset/16 - 6
+ offset == 16 * (handle + 6)
+
+`offset` is the offset of the first byte of the block, measured in bytes
+- as in fseek(3). Handle has type `int64`, but only the lower 7 bytes may be
+nonzero while referring to a block, both in code as well as when persisted in
+the the file's internal bookkeeping structures - see 'Block types' bellow. So a
+handle is effectively only `uint56`.  This also means that the maximum usable
+size of a file is 2^56 atoms.  That is 2^60 bytes == 1 exabyte (10^18 bytes).
+
+Nil handles
+
+A handle with numeric value of '0' refers to no block.
+
+Zero padding
+
+A padding is used to round-up a block size to be a whole number of atoms. Any
+padding, if present, MUST be all zero bytes. Note that the size of padding is
+in [0, 15].
+
+Content wiping
+
+When a block is deallocated, its data content is not wiped as the added
+overhead may be substantial while not necessarily needed. Client code should
+however overwrite the content of any block having sensitive data with eg. zeros
+(good compression) - before deallocating the block.
+
+Block tags
+
+Every block is tagged in its first byte (a head tag) and last byte (tail tag).
+Block types are:
+
+ 1. Short content used block (head tags 0x00-0xFB)
+ 2. Long content used block (head tag 0xFC)
+ 3. Relocated used block (head tag 0xFD)
+ 4. Short, single atom, free block (head tag 0xFE)
+ 5. Long free block (head tag 0xFF)
+
+Note: Relocated used block, 3. above (head tag 0xFD) MUST NOT refer to blocks
+other then 1. or 2. above (head tags 0x00-0xFC).
+
+Content blocks
+
+Used blocks (head tags 0x00-0xFC) tail tag distinguish used/unused block and if
+content is compressed or not.
+
+Content compression
+
+The tail flag of an used block is one of
+
+	CC == 0 // Content is not compressed.
+	CC == 1 // Content is in zappy compression format.
+
+If compression of written content is enabled, there are two cases: If
+compressed size < original size then the compressed content should be written
+if it will save at least one atom of the block. If compressed size >= original
+size then the compressed content should not be used.
+
+It's recommended to use compression. For example the BTrees implementation
+assumes compression is used. Using compression may cause a slowdown in some
+cases while it may as well cause a speedup.
+
+Short content block
+
+Short content block carries content of length between N == 0(0x00) and N ==
+251(0xFB) bytes.
+
+	|<-first atom start  ...  last atom end->|
+	+---++--   ...   --+--   ...   --++------+
+	| 0 ||    1...     |  0x*...0x*E || 0x*F |
+	+---++--   ...   --+--   ...   --++------+
+	| N ||   content   |   padding   ||  CC  |
+	+---++--   ...   --+--   ...   --++------+
+
+	A == (N+1)/16 + 1        // The number of atoms in the block [1, 16]
+	padding == 15 - (N+1)%16 // Length of the zero padding
+
+Long content block
+
+Long content block carries content of length between N == 252(0xFC) and N ==
+65787(0x100FB) bytes.
+
+	|<-first atom start    ...     last atom end->|
+	+------++------+-- ... --+--  ...   --++------+
+	|  0   || 1..2 |   3...  | 0x*...0x*E || 0x*F |
+	+------++------+-- ... --+--  ...   --++------+
+	| 0xFC ||  M   | content |  padding   ||  CC  |
+	+------++------+-- ... --+--  ...   --++------+
+
+	A == (N+3)/16 + 1        // The number of atoms in the block [16, 4112]
+	M == N % 0x10000         // Stored as 2 bytes in network byte order
+	padding == 15 - (N+3)%16 // Length of the zero padding
+
+Relocated used block
+
+Relocated block allows to permanently assign a handle to some content and
+resize the content anytime afterwards without having to update all the possible
+existing references; the handle can be constant while the content size may be
+dynamic. When relocating a block, any space left by the original block content,
+above this single atom block, MUST be reclaimed.
+
+Relocations MUST point only to a used short or long block == blocks with tags
+0x00...0xFC.
+
+	+------++------+---------++----+
+	|  0   || 1..7 | 8...14  || 15 |
+	+------++------+---------++----+
+	| 0xFD ||  H   | padding || 0  |
+	+------++------+---------++----+
+
+H is the handle of the relocated block in network byte order.
+
+Free blocks
+
+Free blocks are the result of space deallocation. Free blocks are organized in
+one or more doubly linked lists, abstracted by the FLT interface. Free blocks
+MUST be "registered" by putting them in such list. Allocator MUST reuse a big
+enough free block, if such exists, before growing the file size. When a free
+block is created by deallocation or reallocation it MUST be joined with any
+adjacently existing free blocks before "registering". If the resulting free
+block is now a last block of a file, the free block MUST be discarded and the
+file size MUST be truncated accordingly instead. Put differently, there MUST
+NOT ever be a free block at the file end.
+
+A single free atom
+
+Is an unused block of size 1 atom.
+
+	+------++------+--------++------+
+	|  0   || 1..7 | 8...14 ||  15  |
+	+------++------+--------++------+
+	| 0xFE ||  P   |   N    || 0xFE |
+	+------++------+--------++------+
+
+P and N, stored in network byte order, are the previous and next free block
+handles in the doubly linked list to which this free block belongs.
+
+A long unused block
+
+Is an unused block of size > 1 atom.
+
+	+------++------+-------+---------+- ... -+----------++------+
+	|  0   || 1..7 | 8..14 | 15...21 |       | Z-7..Z-1 ||  Z   |
+	+------++------+-------+---------+- ... -+----------++------+
+	| 0xFF ||  S   |   P   |    N    | Leak  |    S     || 0xFF |
+	+------++------+-------+---------+- ... -+----------++------+
+
+	Z == 16 * S - 1
+
+S is the size of this unused block in atoms. P and N are the previous and next
+free block handles in the doubly linked list to which this free block belongs.
+Leak contains any data the block had before deallocating this block.  See also
+the subtitle 'Content wiping' above. S, P and N are stored in network byte
+order. Large free blocks may trigger a consideration of file hole punching of
+the Leak field - for some value of 'large'.
+
+Note: Allocator methods vs CRUD[1]:
+
+	Alloc	[C]reate
+	Get	[R]ead
+	Realloc	[U]pdate
+	Free	[D]elete
+
+Note: No Allocator method returns io.EOF.
+
+  [1]: http://en.wikipedia.org/wiki/Create,_read,_update_and_delete
+
+*/
+type Allocator struct {
+	f        Filer
+	flt      flt
+	Compress bool // enables content compression
+	cache    cache
+	m        map[int64]*node
+	lru      lst
+	expHit   int64
+	expMiss  int64
+	cacheSz  int
+	hit      uint16
+	miss     uint16
+	mu       sync.Mutex
+}
+
+// NewAllocator returns a new Allocator. To open an existing file, pass its
+// Filer. To create a "new" file, pass a Filer which file is of zero size.
+func NewAllocator(f Filer, opts *Options) (a *Allocator, err error) {
+	if opts == nil { // Enforce *Options is always passed
+		return nil, errors.New("NewAllocator: nil opts passed")
+	}
+
+	a = &Allocator{
+		f:       f,
+		cacheSz: 10,
+	}
+
+	a.cinit()
+	switch x := f.(type) {
+	case *RollbackFiler:
+		x.afterRollback = func() error {
+			a.cinit()
+			return a.flt.load(a.f, 0)
+		}
+	case *ACIDFiler0:
+		x.RollbackFiler.afterRollback = func() error {
+			a.cinit()
+			return a.flt.load(a.f, 0)
+		}
+	}
+
+	sz, err := f.Size()
+	if err != nil {
+		return
+	}
+
+	a.flt.init()
+	if sz == 0 {
+		var b [fltSz]byte
+		if err = a.f.BeginUpdate(); err != nil {
+			return
+		}
+
+		if _, err = f.WriteAt(b[:], 0); err != nil {
+			a.f.Rollback()
+			return
+		}
+
+		return a, a.f.EndUpdate()
+	}
+
+	return a, a.flt.load(f, 0)
+}
+
+// CacheStats reports cache statistics.
+//
+//TODO return a struct perhaps.
+func (a *Allocator) CacheStats() (buffersUsed, buffersTotal int, bytesUsed, bytesTotal, hits, misses int64) {
+	buffersUsed = len(a.m)
+	buffersTotal = buffersUsed + len(a.cache)
+	bytesUsed = a.lru.size()
+	bytesTotal = bytesUsed + a.cache.size()
+	hits = a.expHit
+	misses = a.expMiss
+	return
+}
+
+func (a *Allocator) cinit() {
+	for h, n := range a.m {
+		a.cache.put(a.lru.remove(n))
+		delete(a.m, h)
+	}
+	if a.m == nil {
+		a.m = map[int64]*node{}
+	}
+}
+
+func (a *Allocator) cadd(b []byte, h int64) {
+	if len(a.m) < a.cacheSz {
+		n := a.cache.get(len(b))
+		n.h = h
+		copy(n.b, b)
+		a.m[h] = a.lru.pushFront(n)
+		return
+	}
+
+	// cache full
+	delete(a.m, a.cache.put(a.lru.removeBack()).h)
+	n := a.cache.get(len(b))
+	n.h = h
+	copy(n.b, b)
+	a.m[h] = a.lru.pushFront(n)
+	return
+}
+
+func (a *Allocator) cfree(h int64) {
+	n, ok := a.m[h]
+	if !ok { // must have been evicted
+		return
+	}
+
+	a.cache.put(a.lru.remove(n))
+	delete(a.m, h)
+}
+
+// Alloc allocates storage space for b and returns the handle of the new block
+// with content set to b or an error, if any. The returned handle is valid only
+// while the block is used - until the block is deallocated. No two valid
+// handles share the same value within the same Filer, but any value of a
+// handle not referring to any used block may become valid any time as a result
+// of Alloc.
+//
+// Invoking Alloc on an empty Allocator is guaranteed to return handle with
+// value 1. The intended use of content of handle 1 is a root "directory" of
+// other data held by an Allocator.
+//
+// Passing handles not obtained initially from Alloc or not anymore valid to
+// any other Allocator methods can result in an irreparably corrupted database.
+func (a *Allocator) Alloc(b []byte) (handle int64, err error) {
+	pbuf := buffer.Get(zappy.MaxEncodedLen(len(b)))
+	defer buffer.Put(pbuf)
+	buf := *pbuf
+	buf, _, cc, err := a.makeUsedBlock(buf, b)
+	if err != nil {
+		return
+	}
+
+	if handle, err = a.alloc(buf, cc); err == nil {
+		a.cadd(b, handle)
+	}
+	return
+}
+
+func (a *Allocator) alloc(b []byte, cc byte) (h int64, err error) {
+	rqAtoms := n2atoms(len(b))
+	if h = a.flt.find(rqAtoms); h == 0 { // must grow
+		var sz int64
+		if sz, err = a.f.Size(); err != nil {
+			return
+		}
+
+		h = off2h(sz)
+		err = a.writeUsedBlock(h, cc, b)
+		return
+	}
+
+	// Handle is the first item of a free blocks list.
+	tag, s, prev, next, err := a.nfo(h)
+	if err != nil {
+		return
+	}
+
+	if tag != tagFreeShort && tag != tagFreeLong {
+		err = &ErrILSEQ{Type: ErrExpFreeTag, Off: h2off(h), Arg: int64(tag)}
+		return
+	}
+
+	if prev != 0 {
+		err = &ErrILSEQ{Type: ErrHead, Off: h2off(h), Arg: prev}
+		return
+	}
+
+	if s < int64(rqAtoms) {
+		err = &ErrILSEQ{Type: ErrSmall, Arg: int64(rqAtoms), Arg2: s, Off: h2off(h)}
+		return
+	}
+
+	if err = a.unlink(h, s, prev, next); err != nil {
+		return
+	}
+
+	if s > int64(rqAtoms) {
+		freeH := h + int64(rqAtoms)
+		freeAtoms := s - int64(rqAtoms)
+		if err = a.link(freeH, freeAtoms); err != nil {
+			return
+		}
+	}
+	return h, a.writeUsedBlock(h, cc, b)
+}
+
+// Free deallocates the block referred to by handle or returns an error, if
+// any.
+//
+// After Free succeeds, handle is invalid and must not be used.
+//
+// Handle must have been obtained initially from Alloc and must be still valid,
+// otherwise a database may get irreparably corrupted.
+func (a *Allocator) Free(handle int64) (err error) {
+	if handle <= 0 || handle > maxHandle {
+		return &ErrINVAL{"Allocator.Free: handle out of limits", handle}
+	}
+
+	a.cfree(handle)
+	return a.free(handle, 0, true)
+}
+
+func (a *Allocator) free(h, from int64, acceptRelocs bool) (err error) {
+	tag, atoms, _, n, err := a.nfo(h)
+	if err != nil {
+		return
+	}
+
+	switch tag {
+	default:
+		// nop
+	case tagUsedLong:
+		// nop
+	case tagUsedRelocated:
+		if !acceptRelocs {
+			return &ErrILSEQ{Type: ErrUnexpReloc, Off: h2off(h), Arg: h2off(from)}
+		}
+
+		if err = a.free(n, h, false); err != nil {
+			return
+		}
+	case tagFreeShort, tagFreeLong:
+		return &ErrINVAL{"Allocator.Free: attempt to free a free block at off", h2off(h)}
+	}
+
+	return a.free2(h, atoms)
+}
+
+func (a *Allocator) free2(h, atoms int64) (err error) {
+	sz, err := a.f.Size()
+	if err != nil {
+		return
+	}
+
+	ltag, latoms, lp, ln, err := a.leftNfo(h)
+	if err != nil {
+		return
+	}
+
+	if ltag != tagFreeShort && ltag != tagFreeLong {
+		latoms = 0
+	}
+
+	var rtag byte
+	var ratoms, rp, rn int64
+
+	isTail := h2off(h)+atoms*16 == sz
+	if !isTail {
+		if rtag, ratoms, rp, rn, err = a.nfo(h + atoms); err != nil {
+			return
+		}
+	}
+
+	if rtag != tagFreeShort && rtag != tagFreeLong {
+		ratoms = 0
+	}
+
+	switch {
+	case latoms == 0 && ratoms == 0:
+		// -> isolated <-
+		if isTail { // cut tail
+			return a.f.Truncate(h2off(h))
+		}
+
+		return a.link(h, atoms)
+	case latoms == 0 && ratoms != 0:
+		// right join ->
+		if err = a.unlink(h+atoms, ratoms, rp, rn); err != nil {
+			return
+		}
+
+		return a.link(h, atoms+ratoms)
+	case latoms != 0 && ratoms == 0:
+		// <- left join
+		if err = a.unlink(h-latoms, latoms, lp, ln); err != nil {
+			return
+		}
+
+		if isTail {
+			return a.f.Truncate(h2off(h - latoms))
+		}
+
+		return a.link(h-latoms, latoms+atoms)
+	}
+
+	// case latoms != 0 && ratoms != 0:
+	// <- middle join ->
+	lh, rh := h-latoms, h+atoms
+	if err = a.unlink(lh, latoms, lp, ln); err != nil {
+		return
+	}
+
+	// Prev unlink may have invalidated rp or rn
+	if _, _, rp, rn, err = a.nfo(rh); err != nil {
+		return
+	}
+
+	if err = a.unlink(rh, ratoms, rp, rn); err != nil {
+		return
+	}
+
+	return a.link(h-latoms, latoms+atoms+ratoms)
+}
+
+// Add a free block h to the appropriate free list
+func (a *Allocator) link(h, atoms int64) (err error) {
+	if err = a.makeFree(h, atoms, 0, a.flt.head(atoms)); err != nil {
+		return
+	}
+
+	return a.flt.setHead(h, atoms, a.f)
+}
+
+// Remove free block h from the free list
+func (a *Allocator) unlink(h, atoms, p, n int64) (err error) {
+	switch {
+	case p == 0 && n == 0:
+		// single item list, must be head
+		return a.flt.setHead(0, atoms, a.f)
+	case p == 0 && n != 0:
+		// head of list (has next item[s])
+		if err = a.prev(n, 0); err != nil {
+			return
+		}
+
+		// new head
+		return a.flt.setHead(n, atoms, a.f)
+	case p != 0 && n == 0:
+		// last item in list
+		return a.next(p, 0)
+	}
+	// case p != 0 && n != 0:
+	// intermediate item in a list
+	if err = a.next(p, n); err != nil {
+		return
+	}
+
+	return a.prev(n, p)
+}
+
+//TODO remove ?
+// Return len(slice) == n, reuse src if possible.
+func need(n int, src []byte) []byte {
+	if cap(src) < n {
+		return *buffer.Get(n)
+	}
+
+	return src[:n]
+}
+
+// Get returns the data content of a block referred to by handle or an error if
+// any.  The returned slice may be a sub-slice of buf if buf was large enough
+// to hold the entire content.  Otherwise, a newly allocated slice will be
+// returned.  It is valid to pass a nil buf.
+//
+// If the content was stored using compression then it is transparently
+// returned decompressed.
+//
+// Handle must have been obtained initially from Alloc and must be still valid,
+// otherwise invalid data may be returned without detecting the error.
+//
+// Get is safe for concurrent access by multiple goroutines iff no other
+// goroutine mutates the DB.
+func (a *Allocator) Get(buf []byte, handle int64) (b []byte, err error) {
+	buf = buf[:cap(buf)]
+	a.mu.Lock() // X1+
+	if n, ok := a.m[handle]; ok {
+		a.lru.moveToFront(n)
+		b = need(len(n.b), buf)
+		copy(b, n.b)
+		a.expHit++
+		a.hit++
+		a.mu.Unlock() // X1-
+		return
+	}
+
+	a.expMiss++
+	a.miss++
+	if a.miss > 10 && len(a.m) < 500 {
+		if 100*a.hit/a.miss < 95 {
+			a.cacheSz++
+		}
+		a.hit, a.miss = 0, 0
+	}
+	a.mu.Unlock() // X1-
+
+	defer func(h int64) {
+		if err == nil {
+			a.mu.Lock() // X2+
+			a.cadd(b, h)
+			a.mu.Unlock() // X2-
+		}
+	}(handle)
+
+	pfirst := buffer.Get(16)
+	defer buffer.Put(pfirst)
+	first := *pfirst
+	relocated := false
+	relocSrc := handle
+reloc:
+	if handle <= 0 || handle > maxHandle {
+		return nil, &ErrINVAL{"Allocator.Get: handle out of limits", handle}
+	}
+
+	off := h2off(handle)
+	if err = a.read(first, off); err != nil {
+		return
+	}
+
+	switch tag := first[0]; tag {
+	default:
+		dlen := int(tag)
+		atoms := n2atoms(dlen)
+		switch atoms {
+		case 1:
+			switch tag := first[15]; tag {
+			default:
+				return nil, &ErrILSEQ{Type: ErrTailTag, Off: off, Arg: int64(tag)}
+			case tagNotCompressed:
+				b = need(dlen, buf)
+				copy(b, first[1:])
+				return
+			case tagCompressed:
+				return zappy.Decode(buf, first[1:dlen+1])
+			}
+		default:
+			pcc := buffer.Get(1)
+			defer buffer.Put(pcc)
+			cc := *pcc
+			dlen := int(tag)
+			atoms := n2atoms(dlen)
+			tailOff := off + 16*int64(atoms) - 1
+			if err = a.read(cc, tailOff); err != nil {
+				return
+			}
+
+			switch tag := cc[0]; tag {
+			default:
+				return nil, &ErrILSEQ{Type: ErrTailTag, Off: off, Arg: int64(tag)}
+			case tagNotCompressed:
+				b = need(dlen, buf)
+				off += 1
+				if err = a.read(b, off); err != nil {
+					b = buf[:0]
+				}
+				return
+			case tagCompressed:
+				pzbuf := buffer.Get(dlen)
+				defer buffer.Put(pzbuf)
+				zbuf := *pzbuf
+				off += 1
+				if err = a.read(zbuf, off); err != nil {
+					return buf[:0], err
+				}
+
+				return zappy.Decode(buf, zbuf)
+			}
+		}
+	case 0:
+		return buf[:0], nil
+	case tagUsedLong:
+		pcc := buffer.Get(1)
+		defer buffer.Put(pcc)
+		cc := *pcc
+		dlen := m2n(int(first[1])<<8 | int(first[2]))
+		atoms := n2atoms(dlen)
+		tailOff := off + 16*int64(atoms) - 1
+		if err = a.read(cc, tailOff); err != nil {
+			return
+		}
+
+		switch tag := cc[0]; tag {
+		default:
+			return nil, &ErrILSEQ{Type: ErrTailTag, Off: off, Arg: int64(tag)}
+		case tagNotCompressed:
+			b = need(dlen, buf)
+			off += 3
+			if err = a.read(b, off); err != nil {
+				b = buf[:0]
+			}
+			return
+		case tagCompressed:
+			pzbuf := buffer.Get(dlen)
+			defer buffer.Put(pzbuf)
+			zbuf := *pzbuf
+			off += 3
+			if err = a.read(zbuf, off); err != nil {
+				return buf[:0], err
+			}
+
+			return zappy.Decode(buf, zbuf)
+		}
+	case tagFreeShort, tagFreeLong:
+		return nil, &ErrILSEQ{Type: ErrExpUsedTag, Off: off, Arg: int64(tag)}
+	case tagUsedRelocated:
+		if relocated {
+			return nil, &ErrILSEQ{Type: ErrUnexpReloc, Off: off, Arg: relocSrc}
+		}
+
+		handle = b2h(first[1:])
+		relocated = true
+		goto reloc
+	}
+}
+
+var reallocTestHook bool
+
+// Realloc sets the content of a block referred to by handle or returns an
+// error, if any.
+//
+// Handle must have been obtained initially from Alloc and must be still valid,
+// otherwise a database may get irreparably corrupted.
+func (a *Allocator) Realloc(handle int64, b []byte) (err error) {
+	if handle <= 0 || handle > maxHandle {
+		return &ErrINVAL{"Realloc: handle out of limits", handle}
+	}
+
+	a.cfree(handle)
+	if err = a.realloc(handle, b); err != nil {
+		return
+	}
+
+	if reallocTestHook {
+		if err = cacheAudit(a.m, &a.lru); err != nil {
+			return
+		}
+	}
+
+	a.cadd(b, handle)
+	return
+}
+
+func (a *Allocator) realloc(handle int64, b []byte) (err error) {
+	var dlen, needAtoms0 int
+
+	pb8 := buffer.Get(8)
+	defer buffer.Put(pb8)
+	b8 := *pb8
+	pdst := buffer.Get(zappy.MaxEncodedLen(len(b)))
+	defer buffer.Put(pdst)
+	dst := *pdst
+	b, needAtoms0, cc, err := a.makeUsedBlock(dst, b)
+	if err != nil {
+		return
+	}
+
+	needAtoms := int64(needAtoms0)
+	off := h2off(handle)
+	if err = a.read(b8[:], off); err != nil {
+		return
+	}
+
+	switch tag := b8[0]; tag {
+	default:
+		dlen = int(b8[0])
+	case tagUsedLong:
+		dlen = m2n(int(b8[1])<<8 | int(b8[2]))
+	case tagUsedRelocated:
+		if err = a.free(b2h(b8[1:]), handle, false); err != nil {
+			return err
+		}
+
+		dlen = 0
+	case tagFreeShort, tagFreeLong:
+		return &ErrINVAL{"Allocator.Realloc: invalid handle", handle}
+	}
+
+	atoms := int64(n2atoms(dlen))
+retry:
+	switch {
+	case needAtoms < atoms:
+		// in place shrink
+		if err = a.writeUsedBlock(handle, cc, b); err != nil {
+			return
+		}
+
+		fh, fa := handle+needAtoms, atoms-needAtoms
+		sz, err := a.f.Size()
+		if err != nil {
+			return err
+		}
+
+		if h2off(fh)+16*fa == sz {
+			return a.f.Truncate(h2off(fh))
+		}
+
+		return a.free2(fh, fa)
+	case needAtoms == atoms:
+		// in place replace
+		return a.writeUsedBlock(handle, cc, b)
+	}
+
+	// case needAtoms > atoms:
+	// in place extend or relocate
+	var sz int64
+	if sz, err = a.f.Size(); err != nil {
+		return
+	}
+
+	off = h2off(handle)
+	switch {
+	case off+atoms*16 == sz:
+		// relocating tail block - shortcut
+		return a.writeUsedBlock(handle, cc, b)
+	default:
+		if off+atoms*16 < sz {
+			// handle is not a tail block, check right neighbour
+			rh := handle + atoms
+			rtag, ratoms, p, n, e := a.nfo(rh)
+			if e != nil {
+				return e
+			}
+
+			if rtag == tagFreeShort || rtag == tagFreeLong {
+				// Right neighbour is a free block
+				if needAtoms <= atoms+ratoms {
+					// can expand in place
+					if err = a.unlink(rh, ratoms, p, n); err != nil {
+						return
+					}
+
+					atoms += ratoms
+					goto retry
+
+				}
+			}
+		}
+	}
+
+	if atoms > 1 {
+		if err = a.realloc(handle, nil); err != nil {
+			return
+		}
+	}
+
+	var newH int64
+	if newH, err = a.alloc(b, cc); err != nil {
+		return err
+	}
+
+	prb := buffer.CGet(16)
+	defer buffer.Put(prb)
+	rb := *prb
+	rb[0] = tagUsedRelocated
+	h2b(rb[1:], newH)
+	if err = a.writeAt(rb[:], h2off(handle)); err != nil {
+		return
+	}
+
+	return a.writeUsedBlock(newH, cc, b)
+}
+
+func (a *Allocator) writeAt(b []byte, off int64) (err error) {
+	var n int
+	if n, err = a.f.WriteAt(b, off); err != nil {
+		return
+	}
+
+	if n != len(b) {
+		err = io.ErrShortWrite
+	}
+	return
+}
+
+func (a *Allocator) write(off int64, b ...[]byte) (err error) {
+	rq := 0
+	for _, part := range b {
+		rq += len(part)
+	}
+	pbuf := buffer.Get(rq)
+	defer buffer.Put(pbuf)
+	buf := *pbuf
+	buf = buf[:0]
+	for _, part := range b {
+		buf = append(buf, part...)
+	}
+	return a.writeAt(buf, off)
+}
+
+func (a *Allocator) read(b []byte, off int64) (err error) {
+	var rn int
+	if rn, err = a.f.ReadAt(b, off); rn != len(b) {
+		return &ErrILSEQ{Type: ErrOther, Off: off, More: err}
+	}
+
+	return nil
+}
+
+// nfo returns h's tag. If it's a free block then return also (s)ize (in
+// atoms), (p)rev and (n)ext. If it's a used block then only (s)ize is returned
+// (again in atoms). If it's a used relocate block then (n)ext is set to the
+// relocation target handle.
+func (a *Allocator) nfo(h int64) (tag byte, s, p, n int64, err error) {
+	off := h2off(h)
+	rq := int64(22)
+	sz, err := a.f.Size()
+	if err != nil {
+		return
+	}
+
+	if off+rq >= sz {
+		if rq = sz - off; rq < 15 {
+			err = io.ErrUnexpectedEOF
+			return
+		}
+	}
+
+	pbuf := buffer.Get(22)
+	defer buffer.Put(pbuf)
+	buf := *pbuf
+	if err = a.read(buf[:rq], off); err != nil {
+		return
+	}
+
+	switch tag = buf[0]; tag {
+	default:
+		s = int64(n2atoms(int(tag)))
+	case tagUsedLong:
+		s = int64(n2atoms(m2n(int(buf[1])<<8 | int(buf[2]))))
+	case tagFreeLong:
+		if rq < 22 {
+			err = io.ErrUnexpectedEOF
+			return
+		}
+
+		s, p, n = b2h(buf[1:]), b2h(buf[8:]), b2h(buf[15:])
+	case tagUsedRelocated:
+		s, n = 1, b2h(buf[1:])
+	case tagFreeShort:
+		s, p, n = 1, b2h(buf[1:]), b2h(buf[8:])
+	}
+	return
+}
+
+// leftNfo returns nfo for h's left neighbor if h > 1 and the left neighbor is
+// a free block. Otherwise all zero values are returned instead.
+func (a *Allocator) leftNfo(h int64) (tag byte, s, p, n int64, err error) {
+	if !(h > 1) {
+		return
+	}
+
+	pbuf := buffer.Get(8)
+	defer buffer.Put(pbuf)
+	buf := *pbuf
+	off := h2off(h)
+	if err = a.read(buf[:], off-8); err != nil {
+		return
+	}
+
+	switch tag := buf[7]; tag {
+	case tagFreeShort:
+		return a.nfo(h - 1)
+	case tagFreeLong:
+		return a.nfo(h - b2h(buf[:]))
+	}
+	return
+}
+
+// Set h.prev = p
+func (a *Allocator) prev(h, p int64) (err error) {
+	pb := buffer.Get(7)
+	defer buffer.Put(pb)
+	b := *pb
+	off := h2off(h)
+	if err = a.read(b[:1], off); err != nil {
+		return
+	}
+
+	switch tag := b[0]; tag {
+	default:
+		return &ErrILSEQ{Type: ErrExpFreeTag, Off: off, Arg: int64(tag)}
+	case tagFreeShort:
+		off += 1
+	case tagFreeLong:
+		off += 8
+	}
+	return a.writeAt(h2b(b[:7], p), off)
+}
+
+// Set h.next = n
+func (a *Allocator) next(h, n int64) (err error) {
+	pb := buffer.Get(7)
+	defer buffer.Put(pb)
+	b := *pb
+	off := h2off(h)
+	if err = a.read(b[:1], off); err != nil {
+		return
+	}
+
+	switch tag := b[0]; tag {
+	default:
+		return &ErrILSEQ{Type: ErrExpFreeTag, Off: off, Arg: int64(tag)}
+	case tagFreeShort:
+		off += 8
+	case tagFreeLong:
+		off += 15
+	}
+	return a.writeAt(h2b(b[:7], n), off)
+}
+
+// Make the filer image @h a free block.
+func (a *Allocator) makeFree(h, atoms, prev, next int64) (err error) {
+	pbuf := buffer.Get(22)
+	defer buffer.Put(pbuf)
+	buf := *pbuf
+	switch {
+	case atoms == 1:
+		buf[0], buf[15] = tagFreeShort, tagFreeShort
+		h2b(buf[1:], prev)
+		h2b(buf[8:], next)
+		if err = a.write(h2off(h), buf[:16]); err != nil {
+			return
+		}
+	default:
+
+		buf[0] = tagFreeLong
+		h2b(buf[1:], atoms)
+		h2b(buf[8:], prev)
+		h2b(buf[15:], next)
+		if err = a.write(h2off(h), buf[:22]); err != nil {
+			return
+		}
+
+		h2b(buf[:], atoms)
+		buf[7] = tagFreeLong
+		if err = a.write(h2off(h+atoms)-8, buf[:8]); err != nil {
+			return
+		}
+	}
+	if prev != 0 {
+		if err = a.next(prev, h); err != nil {
+			return
+		}
+	}
+
+	if next != 0 {
+		err = a.prev(next, h)
+	}
+	return
+}
+
+func (a *Allocator) makeUsedBlock(dst []byte, b []byte) (w []byte, rqAtoms int, cc byte, err error) {
+	cc = tagNotCompressed
+	w = b
+
+	var n int
+	if n = len(b); n > maxRq {
+		return nil, 0, 0, &ErrINVAL{"Allocator.makeUsedBlock: content size out of limits", n}
+	}
+
+	rqAtoms = n2atoms(n)
+	if a.Compress && n > 14 { // attempt compression
+		if dst, err = zappy.Encode(dst, b); err != nil {
+			return
+		}
+
+		n2 := len(dst)
+		if rqAtoms2 := n2atoms(n2); rqAtoms2 < rqAtoms { // compression saved at least a single atom
+			w, n, rqAtoms, cc = dst, n2, rqAtoms2, tagCompressed
+		}
+	}
+	return
+}
+
+func (a *Allocator) writeUsedBlock(h int64, cc byte, b []byte) (err error) {
+	n := len(b)
+	rq := n2atoms(n) << 4
+	pbuf := buffer.Get(rq)
+	defer buffer.Put(pbuf)
+	buf := *pbuf
+	switch n <= maxShort {
+	case true:
+		buf[0] = byte(n)
+		copy(buf[1:], b)
+	case false:
+		m := n2m(n)
+		buf[0], buf[1], buf[2] = tagUsedLong, byte(m>>8), byte(m)
+		copy(buf[3:], b)
+	}
+	if p := n2padding(n); p != 0 {
+		copy(buf[rq-1-p:], zeros[:])
+	}
+	buf[rq-1] = cc
+	return a.writeAt(buf, h2off(h))
+}
+
+func (a *Allocator) verifyUnused(h, totalAtoms int64, tag byte, log func(error) bool, fast bool) (atoms, prev, next int64, err error) {
+	switch tag {
+	default:
+		panic("internal error")
+	case tagFreeShort:
+		var b [16]byte
+		off := h2off(h)
+		if err = a.read(b[:], off); err != nil {
+			return
+		}
+
+		if b[15] != tagFreeShort {
+			err = &ErrILSEQ{Type: ErrShortFreeTailTag, Off: off, Arg: int64(b[15])}
+			log(err)
+			return
+		}
+
+		atoms, prev, next = 1, b2h(b[1:]), b2h(b[8:])
+	case tagFreeLong:
+		var b [22]byte
+		off := h2off(h)
+		if err = a.read(b[:], off); err != nil {
+			return
+		}
+
+		atoms, prev, next = b2h(b[1:]), b2h(b[8:]), b2h(b[15:])
+		if fast {
+			return
+		}
+
+		if atoms < 2 {
+			err = &ErrILSEQ{Type: ErrLongFreeBlkTooShort, Off: off, Arg: int64(atoms)}
+			break
+		}
+
+		if h+atoms-1 > totalAtoms {
+			err = &ErrILSEQ{Type: ErrLongFreeBlkTooLong, Off: off, Arg: atoms}
+			break
+		}
+
+		if prev > totalAtoms {
+			err = &ErrILSEQ{Type: ErrLongFreePrevBeyondEOF, Off: off, Arg: next}
+			break
+		}
+
+		if next > totalAtoms {
+			err = &ErrILSEQ{Type: ErrLongFreeNextBeyondEOF, Off: off, Arg: next}
+			break
+		}
+
+		toff := h2off(h+atoms) - 8
+		if err = a.read(b[:8], toff); err != nil {
+			return
+		}
+
+		if b[7] != tag {
+			err = &ErrILSEQ{Type: ErrLongFreeTailTag, Off: off, Arg: int64(b[7])}
+			break
+		}
+
+		if s2 := b2h(b[:]); s2 != atoms {
+			err = &ErrILSEQ{Type: ErrVerifyTailSize, Off: off, Arg: atoms, Arg2: s2}
+			break
+		}
+
+	}
+	if err != nil {
+		log(err)
+	}
+	return
+}
+
+func (a *Allocator) verifyUsed(h, totalAtoms int64, tag byte, buf, ubuf []byte, log func(error) bool, fast bool) (compressed bool, dlen int, atoms, link int64, err error) {
+	var (
+		padding  int
+		doff     int64
+		padZeros [15]byte
+		tailBuf  [16]byte
+	)
+
+	switch tag {
+	default: // Short used
+		dlen = int(tag)
+		atoms = int64((dlen+1)/16) + 1
+		padding = 15 - (dlen+1)%16
+		doff = h2off(h) + 1
+	case tagUsedLong:
+		off := h2off(h) + 1
+		var b2 [2]byte
+		if err = a.read(b2[:], off); err != nil {
+			return
+		}
+
+		dlen = m2n(int(b2[0])<<8 | int(b2[1]))
+		atoms = int64((dlen+3)/16) + 1
+		padding = 15 - (dlen+3)%16
+		doff = h2off(h) + 3
+	case tagUsedRelocated:
+		dlen = 7
+		atoms = 1
+		padding = 7
+		doff = h2off(h) + 1
+	case tagFreeShort, tagFreeLong:
+		panic("internal error")
+	}
+
+	if fast {
+		if tag == tagUsedRelocated {
+			dlen = 0
+			if err = a.read(buf[:7], doff); err != nil {
+				return
+			}
+
+			link = b2h(buf)
+		}
+
+		return false, dlen, atoms, link, nil
+	}
+
+	if ok := h+atoms-1 <= totalAtoms; !ok { // invalid last block
+		err = &ErrILSEQ{Type: ErrVerifyUsedSpan, Off: h2off(h), Arg: atoms}
+		log(err)
+		return
+	}
+
+	tailsz := 1 + padding
+	off := h2off(h) + 16*atoms - int64(tailsz)
+	if err = a.read(tailBuf[:tailsz], off); err != nil {
+		return false, 0, 0, 0, err
+	}
+
+	if ok := bytes.Equal(padZeros[:padding], tailBuf[:padding]); !ok {
+		err = &ErrILSEQ{Type: ErrVerifyPadding, Off: h2off(h)}
+		log(err)
+		return
+	}
+
+	var cc byte
+	switch cc = tailBuf[padding]; cc {
+	default:
+		err = &ErrILSEQ{Type: ErrTailTag, Off: h2off(h)}
+		log(err)
+		return
+	case tagCompressed:
+		compressed = true
+		if tag == tagUsedRelocated {
+			err = &ErrILSEQ{Type: ErrTailTag, Off: h2off(h)}
+			log(err)
+			return
+		}
+
+		fallthrough
+	case tagNotCompressed:
+		if err = a.read(buf[:dlen], doff); err != nil {
+			return false, 0, 0, 0, err
+		}
+	}
+
+	if cc == tagCompressed {
+		if ubuf, err = zappy.Decode(ubuf, buf[:dlen]); err != nil || len(ubuf) > maxRq {
+			err = &ErrILSEQ{Type: ErrDecompress, Off: h2off(h)}
+			log(err)
+			return
+		}
+
+		dlen = len(ubuf)
+	}
+
+	if tag == tagUsedRelocated {
+		link = b2h(buf)
+		if link == 0 {
+			err = &ErrILSEQ{Type: ErrNullReloc, Off: h2off(h)}
+			log(err)
+			return
+		}
+
+		if link > totalAtoms { // invalid last block
+			err = &ErrILSEQ{Type: ErrRelocBeyondEOF, Off: h2off(h), Arg: link}
+			log(err)
+			return
+		}
+	}
+
+	return
+}
+
+var nolog = func(error) bool { return false }
+
+// Verify attempts to find any structural errors in a Filer wrt the
+// organization of it as defined by Allocator. 'bitmap' is a scratch pad for
+// necessary bookkeeping and will grow to at most to Allocator's
+// Filer.Size()/128 (0,78%).  Any problems found are reported to 'log' except
+// non verify related errors like disk read fails etc.  If 'log' returns false
+// or the error doesn't allow to (reliably) continue, the verification process
+// is stopped and an error is returned from the Verify function. Passing a nil
+// log works like providing a log function always returning false. Any
+// non-structural errors, like for instance Filer read errors, are NOT reported
+// to 'log', but returned as the Verify's return value, because Verify cannot
+// proceed in such cases.  Verify returns nil only if it fully completed
+// verifying Allocator's Filer without detecting any error.
+//
+// It is recommended to limit the number reported problems by returning false
+// from 'log' after reaching some limit. Huge and corrupted DB can produce an
+// overwhelming error report dataset.
+//
+// The verifying process will scan the whole DB at least 3 times (a trade
+// between processing space and time consumed). It doesn't read the content of
+// free blocks above the head/tail info bytes. If the 3rd phase detects lost
+// free space, then a 4th scan (a faster one) is performed to precisely report
+// all of them.
+//
+// If the DB/Filer to be verified is reasonably small, respective if its
+// size/128 can comfortably fit within process's free memory, then it is
+// recommended to consider using a MemFiler for the bit map.
+//
+// Statistics are returned via 'stats' if non nil. The statistics are valid
+// only if Verify succeeded, ie. it didn't reported anything to log and it
+// returned a nil error.
+func (a *Allocator) Verify(bitmap Filer, log func(error) bool, stats *AllocStats) (err error) {
+	if log == nil {
+		log = nolog
+	}
+
+	n, err := bitmap.Size()
+	if err != nil {
+		return
+	}
+
+	if n != 0 {
+		return &ErrINVAL{"Allocator.Verify: bit map initial size non zero (%d)", n}
+	}
+
+	var bits int64
+	bitMask := [8]byte{1, 2, 4, 8, 16, 32, 64, 128}
+	byteBuf := []byte{0}
+
+	//DONE
+	// +performance, this implementation is hopefully correct but _very_
+	// naive, probably good as a prototype only. Use maybe a MemFiler
+	// "cache" etc.
+	// ----
+	// Turns out the OS caching is as effective as it can probably get.
+	bit := func(on bool, h int64) (wasOn bool, err error) {
+		m := bitMask[h&7]
+		off := h >> 3
+		var v byte
+		sz, err := bitmap.Size()
+		if err != nil {
+			return
+		}
+
+		if off < sz {
+			if n, err := bitmap.ReadAt(byteBuf, off); n != 1 {
+				return false, &ErrILSEQ{Type: ErrOther, Off: off, More: fmt.Errorf("Allocator.Verify - reading bitmap: %s", err)}
+			}
+
+			v = byteBuf[0]
+		}
+		switch wasOn = v&m != 0; on {
+		case true:
+			if !wasOn {
+				v |= m
+				bits++
+			}
+		case false:
+			if wasOn {
+				v ^= m
+				bits--
+			}
+		}
+		byteBuf[0] = v
+		if n, err := bitmap.WriteAt(byteBuf, off); n != 1 || err != nil {
+			return false, &ErrILSEQ{Type: ErrOther, Off: off, More: fmt.Errorf("Allocator.Verify - writing bitmap: %s", err)}
+		}
+
+		return
+	}
+
+	// Phase 1 - sequentially scan a.f to reliably determine block
+	// boundaries. Set a bit for every block start.
+	var (
+		buf, ubuf       [maxRq]byte
+		prevH, h, atoms int64
+		wasOn           bool
+		tag             byte
+		st              = AllocStats{
+			AllocMap: map[int64]int64{},
+			FreeMap:  map[int64]int64{},
+		}
+		dlen int
+	)
+
+	fsz, err := a.f.Size()
+	if err != nil {
+		return
+	}
+
+	ok := fsz%16 == 0
+	totalAtoms := (fsz - fltSz) / atomLen
+	if !ok {
+		err = &ErrILSEQ{Type: ErrFileSize, Name: a.f.Name(), Arg: fsz}
+		log(err)
+		return
+	}
+
+	st.TotalAtoms = totalAtoms
+	prevTag := -1
+	lastH := int64(-1)
+
+	for h = 1; h <= totalAtoms; h += atoms {
+		prevH = h // For checking last block == used
+
+		off := h2off(h)
+		if err = a.read(buf[:1], off); err != nil {
+			return
+		}
+
+		switch tag = buf[0]; tag {
+		default: // Short used
+			fallthrough
+		case tagUsedLong, tagUsedRelocated:
+			var compressed bool
+			if compressed, dlen, atoms, _, err = a.verifyUsed(h, totalAtoms, tag, buf[:], ubuf[:], log, false); err != nil {
+				return
+			}
+
+			if compressed {
+				st.Compression++
+			}
+			st.AllocAtoms += atoms
+			switch {
+			case tag == tagUsedRelocated:
+				st.AllocMap[1]++
+				st.Relocations++
+			default:
+				st.AllocMap[atoms]++
+				st.AllocBytes += int64(dlen)
+				st.Handles++
+			}
+		case tagFreeShort, tagFreeLong:
+			if prevTag == tagFreeShort || prevTag == tagFreeLong {
+				err = &ErrILSEQ{Type: ErrAdjacentFree, Off: h2off(lastH), Arg: off}
+				log(err)
+				return
+			}
+
+			if atoms, _, _, err = a.verifyUnused(h, totalAtoms, tag, log, false); err != nil {
+				return
+			}
+
+			st.FreeMap[atoms]++
+			st.FreeAtoms += atoms
+		}
+
+		if wasOn, err = bit(true, h); err != nil {
+			return
+		}
+
+		if wasOn {
+			panic("internal error")
+		}
+
+		prevTag = int(tag)
+		lastH = h
+	}
+
+	if totalAtoms != 0 && (tag == tagFreeShort || tag == tagFreeLong) {
+		err = &ErrILSEQ{Type: ErrFreeTailBlock, Off: h2off(prevH)}
+		log(err)
+		return
+	}
+
+	// Phase 2 - check used blocks, turn off the map bit for every used
+	// block.
+	for h = 1; h <= totalAtoms; h += atoms {
+		off := h2off(h)
+		if err = a.read(buf[:1], off); err != nil {
+			return
+		}
+
+		var link int64
+		switch tag = buf[0]; tag {
+		default: // Short used
+			fallthrough
+		case tagUsedLong, tagUsedRelocated:
+			if _, _, atoms, link, err = a.verifyUsed(h, totalAtoms, tag, buf[:], ubuf[:], log, true); err != nil {
+				return
+			}
+		case tagFreeShort, tagFreeLong:
+			if atoms, _, _, err = a.verifyUnused(h, totalAtoms, tag, log, true); err != nil {
+				return
+			}
+		}
+
+		turnoff := true
+		switch tag {
+		case tagUsedRelocated:
+			if err = a.read(buf[:1], h2off(link)); err != nil {
+				return
+			}
+
+			switch linkedTag := buf[0]; linkedTag {
+			case tagFreeShort, tagFreeLong, tagUsedRelocated:
+				err = &ErrILSEQ{Type: ErrInvalidRelocTarget, Off: off, Arg: link}
+				log(err)
+				return
+			}
+
+		case tagFreeShort, tagFreeLong:
+			turnoff = false
+		}
+
+		if !turnoff {
+			continue
+		}
+
+		if wasOn, err = bit(false, h); err != nil {
+			return
+		}
+
+		if !wasOn {
+			panic("internal error")
+		}
+
+	}
+
+	// Phase 3 - using the flt check heads link to proper free blocks.  For
+	// every free block, walk the list, verify the {next, prev} links and
+	// turn the respective map bit off. After processing all free lists,
+	// the map bits count should be zero. Otherwise there are "lost" free
+	// blocks.
+
+	var prev, next, fprev, fnext int64
+	rep := a.flt
+
+	for _, list := range rep {
+		prev, next = 0, list.head
+		for ; next != 0; prev, next = next, fnext {
+			if wasOn, err = bit(false, next); err != nil {
+				return
+			}
+
+			if !wasOn {
+				err = &ErrILSEQ{Type: ErrFLT, Off: h2off(next), Arg: h}
+				log(err)
+				return
+			}
+
+			off := h2off(next)
+			if err = a.read(buf[:1], off); err != nil {
+				return
+			}
+
+			switch tag = buf[0]; tag {
+			default:
+				panic("internal error")
+			case tagFreeShort, tagFreeLong:
+				if atoms, fprev, fnext, err = a.verifyUnused(next, totalAtoms, tag, log, true); err != nil {
+					return
+				}
+
+				if min := list.minSize; atoms < min {
+					err = &ErrILSEQ{Type: ErrFLTSize, Off: h2off(next), Arg: atoms, Arg2: min}
+					log(err)
+					return
+				}
+
+				if fprev != prev {
+					err = &ErrILSEQ{Type: ErrFreeChaining, Off: h2off(next)}
+					log(err)
+					return
+				}
+			}
+		}
+
+	}
+
+	if bits == 0 { // Verify succeeded
+		if stats != nil {
+			*stats = st
+		}
+		return
+	}
+
+	// Phase 4 - if after phase 3 there are lost free blocks, report all of
+	// them to 'log'
+	for i := range ubuf { // setup zeros for compares
+		ubuf[i] = 0
+	}
+
+	var off, lh int64
+	rem, err := bitmap.Size()
+	if err != nil {
+		return err
+	}
+
+	for rem != 0 {
+		rq := int(mathutil.MinInt64(64*1024, rem))
+		var n int
+		if n, err = bitmap.ReadAt(buf[:rq], off); n != rq {
+			return &ErrILSEQ{Type: ErrOther, Off: off, More: fmt.Errorf("bitmap ReadAt(size %d, off %#x): %s", rq, off, err)}
+		}
+
+		if !bytes.Equal(buf[:rq], ubuf[:rq]) {
+			for d, v := range buf[:rq] {
+				if v != 0 {
+					for i, m := range bitMask {
+						if v&m != 0 {
+							lh = 8*(off+int64(d)) + int64(i)
+							err = &ErrILSEQ{Type: ErrLostFreeBlock, Off: h2off(lh)}
+							log(err)
+							return
+						}
+					}
+				}
+			}
+		}
+
+		off += int64(rq)
+		rem -= int64(rq)
+	}
+
+	return
+}
+
+type fltSlot struct {
+	head    int64
+	minSize int64
+}
+
+func (f fltSlot) String() string {
+	return fmt.Sprintf("head %#x, minSize %#x\n", f.head, f.minSize)
+}
+
+type flt [14]fltSlot
+
+func (f *flt) init() {
+	sz := 1
+	for i := range *f {
+		f[i].minSize, f[i].head = int64(sz), 0
+		sz <<= 1
+	}
+	f[13].minSize = 4112
+}
+
+func (f *flt) load(fi Filer, off int64) (err error) {
+	pb := buffer.Get(fltSz)
+	defer buffer.Put(pb)
+	b := *pb
+	if _, err = fi.ReadAt(b[:], off); err != nil {
+		return
+	}
+
+	for i := range *f {
+		off := 8*i + 1
+		f[i].head = b2h(b[off:])
+	}
+	return
+}
+
+func (f *flt) find(rq int) (h int64) {
+	switch {
+	case rq < 1:
+		panic(rq)
+	case rq >= maxFLTRq:
+		h, f[13].head = f[13].head, 0
+		return
+	default:
+		g := f[mathutil.Log2Uint16(uint16(rq)):]
+		for i := range g {
+			p := &g[i]
+			if rq <= int(p.minSize) {
+				if h = p.head; h != 0 {
+					p.head = 0
+					return
+				}
+			}
+		}
+		return
+	}
+}
+
+func (f *flt) head(atoms int64) (h int64) {
+	switch {
+	case atoms < 1:
+		panic(atoms)
+	case atoms >= maxFLTRq:
+		return f[13].head
+	default:
+		lg := mathutil.Log2Uint16(uint16(atoms))
+		g := f[lg:]
+		for i := range g {
+			if atoms < g[i+1].minSize {
+				return g[i].head
+			}
+		}
+		panic("internal error")
+	}
+}
+
+func (f *flt) setHead(h, atoms int64, fi Filer) (err error) {
+	switch {
+	case atoms < 1:
+		panic(atoms)
+	case atoms >= maxFLTRq:
+		pb := buffer.Get(7)
+		defer buffer.Put(pb)
+		b := *pb
+		if _, err = fi.WriteAt(h2b(b[:], h), 8*13+1); err != nil {
+			return
+		}
+
+		f[13].head = h
+		return
+	default:
+		lg := mathutil.Log2Uint16(uint16(atoms))
+		g := f[lg:]
+		for i := range f {
+			if atoms < g[i+1].minSize {
+				pb := buffer.Get(7)
+				defer buffer.Put(pb)
+				b := *pb
+				if _, err = fi.WriteAt(h2b(b[:], h), 8*int64(i+lg)+1); err != nil {
+					return
+				}
+
+				g[i].head = h
+				return
+			}
+		}
+		panic("internal error")
+	}
+}
+
+func (f *flt) String() string {
+	a := []string{}
+	for i, v := range *f {
+		a = append(a, fmt.Sprintf("[%2d] %s", i, v))
+	}
+	return strings.Join(a, "")
+}
+
+type node struct {
+	b          []byte
+	h          int64
+	prev, next *node
+}
+
+type cache []*node
+
+func (c *cache) get(n int) *node {
+	r, _ := c.get2(n)
+	return r
+}
+
+func (c *cache) get2(n int) (r *node, isZeroed bool) {
+	s := *c
+	lens := len(s)
+	if lens == 0 {
+		return &node{b: make([]byte, n, mathutil.Min(2*n, maxBuf))}, true
+	}
+
+	i := sort.Search(lens, func(x int) bool { return len(s[x].b) >= n })
+	if i == lens {
+		i--
+		s[i].b, isZeroed = make([]byte, n, mathutil.Min(2*n, maxBuf)), true
+	}
+
+	r = s[i]
+	r.b = r.b[:n]
+	copy(s[i:], s[i+1:])
+	s = s[:lens-1]
+	*c = s
+	return
+}
+
+func (c *cache) cget(n int) (r *node) {
+	r, ok := c.get2(n)
+	if ok {
+		return
+	}
+
+	for i := range r.b {
+		r.b[i] = 0
+	}
+	return
+}
+
+func (c *cache) size() (sz int64) {
+	for _, n := range *c {
+		sz += int64(cap(n.b))
+	}
+	return
+}
+
+func (c *cache) put(n *node) *node {
+	s := *c
+	n.b = n.b[:cap(n.b)]
+	lenb := len(n.b)
+	lens := len(s)
+	i := sort.Search(lens, func(x int) bool { return len(s[x].b) >= lenb })
+	s = append(s, nil)
+	copy(s[i+1:], s[i:])
+	s[i] = n
+	*c = s
+	return n
+}
+
+type lst struct {
+	front, back *node
+}
+
+func (l *lst) pushFront(n *node) *node {
+	if l.front == nil {
+		l.front, l.back, n.prev, n.next = n, n, nil, nil
+		return n
+	}
+
+	n.prev, n.next, l.front.prev, l.front = nil, l.front, n, n
+	return n
+}
+
+func (l *lst) remove(n *node) *node {
+	if n.prev == nil {
+		l.front = n.next
+	} else {
+		n.prev.next = n.next
+	}
+	if n.next == nil {
+		l.back = n.prev
+	} else {
+		n.next.prev = n.prev
+	}
+	n.prev, n.next = nil, nil
+	return n
+}
+
+func (l *lst) removeBack() *node {
+	return l.remove(l.back)
+}
+
+func (l *lst) moveToFront(n *node) *node {
+	return l.pushFront(l.remove(n))
+}
+
+func (l *lst) size() (sz int64) {
+	for n := l.front; n != nil; n = n.next {
+		sz += int64(cap(n.b))
+	}
+	return
+}
+
+func cacheAudit(m map[int64]*node, l *lst) (err error) {
+	cnt := 0
+	for h, n := range m {
+		if g, e := n.h, h; g != e {
+			return fmt.Errorf("cacheAudit: invalid node handle %d != %d", g, e)
+		}
+
+		if cnt, err = l.audit(n, true); err != nil {
+			return
+		}
+	}
+
+	if g, e := cnt, len(m); g != e {
+		return fmt.Errorf("cacheAudit: invalid cache size %d != %d", g, e)
+	}
+
+	return
+}
+
+func (l *lst) audit(n *node, onList bool) (cnt int, err error) {
+	if !onList && (n.prev != nil || n.next != nil) {
+		return -1, fmt.Errorf("lst.audit: free node with non nil linkage")
+	}
+
+	if l.front == nil && l.back != nil || l.back == nil && l.front != nil {
+		return -1, fmt.Errorf("lst.audit: one of .front/.back is nil while the other is non nil")
+	}
+
+	if l.front == l.back && l.front != nil {
+		x := l.front
+		if x.prev != nil || x.next != nil {
+			return -1, fmt.Errorf("lst.audit: single node has non nil linkage")
+		}
+
+		if onList && x != n {
+			return -1, fmt.Errorf("lst.audit: single node is alien")
+		}
+	}
+
+	seen := false
+	var prev *node
+	x := l.front
+	for x != nil {
+		cnt++
+		if x.prev != prev {
+			return -1, fmt.Errorf("lst.audit: broken .prev linkage")
+		}
+
+		if x == n {
+			seen = true
+		}
+
+		prev = x
+		x = x.next
+	}
+
+	if prev != l.back {
+		return -1, fmt.Errorf("lst.audit: broken .back linkage")
+	}
+
+	if onList && !seen {
+		return -1, fmt.Errorf("lst.audit: node missing in list")
+	}
+
+	if !onList && seen {
+		return -1, fmt.Errorf("lst.audit: node should not be on the list")
+	}
+
+	return
+}

+ 192 - 0
vendor/github.com/cznic/lldb/filer.go

@@ -0,0 +1,192 @@
+// Copyright 2014 The lldb Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// An abstraction of file like (persistent) storage with optional (abstracted)
+// support for structural integrity.
+
+package lldb
+
+import (
+	"fmt"
+
+	"github.com/cznic/mathutil"
+)
+
+func doubleTrouble(first, second error) error {
+	return fmt.Errorf("%q. Additionally, while attempting to recover (rollback): %q", first, second)
+}
+
+// A Filer is a []byte-like model of a file or similar entity. It may
+// optionally implement support for structural transaction safety. In contrast
+// to a file stream, a Filer is not sequentially accessible. ReadAt and WriteAt
+// are always "addressed" by an offset and are assumed to perform atomically.
+// A Filer is not safe for concurrent access, it's designed for consumption by
+// the other objects in package, which should use a Filer from one goroutine
+// only or via a mutex. BeginUpdate, EndUpdate and Rollback must be either all
+// implemented by a Filer for structural integrity - or they should be all
+// no-ops; where/if that requirement is relaxed.
+//
+// If a Filer wraps another Filer implementation, it usually invokes the same
+// methods on the "inner" one, after some possible argument translations etc.
+// If a Filer implements the structural transactions handling methods
+// (BeginUpdate, EndUpdate and Rollback) as no-ops _and_ wraps another Filer:
+// it then still MUST invoke those methods on the inner Filer. This is
+// important for the case where a RollbackFiler exists somewhere down the
+// chain.  It's also important for an Allocator - to know when it must
+// invalidate its FLT cache.
+type Filer interface {
+	// BeginUpdate increments the "nesting" counter (initially zero). Every
+	// call to BeginUpdate must be eventually "balanced" by exactly one of
+	// EndUpdate or Rollback. Calls to BeginUpdate may nest.
+	BeginUpdate() error
+
+	// Analogous to os.File.Close().
+	Close() error
+
+	// EndUpdate decrements the "nesting" counter. If it's zero after that
+	// then assume the "storage" has reached structural integrity (after a
+	// batch of partial updates). If a Filer implements some support for
+	// that (write ahead log, journal, etc.) then the appropriate actions
+	// are to be taken for nesting == 0. Invocation of an unbalanced
+	// EndUpdate is an error.
+	EndUpdate() error
+
+	// Analogous to os.File.Name().
+	Name() string
+
+	// PunchHole deallocates space inside a "file" in the byte range
+	// starting at off and continuing for size bytes. The actual hole
+	// created by PunchHole may be smaller than requested. The Filer size
+	// (as reported by `Size()` does not change when hole punching, even
+	// when punching the end of a file off.  In contrast to the Linux
+	// implementation of FALLOC_FL_PUNCH_HOLE in `fallocate`(2); a Filer is
+	// free not only to ignore `PunchHole()` (implement it as a nop), but
+	// additionally no guarantees about the content of the hole, when
+	// eventually read back, are required, i.e.  any data, not only zeros,
+	// can be read from the "hole", including just anything what was left
+	// there - with all of the possible security problems.
+	PunchHole(off, size int64) error
+
+	// As os.File.ReadAt. Note: `off` is an absolute "file pointer"
+	// address and cannot be negative even when a Filer is a InnerFiler.
+	ReadAt(b []byte, off int64) (n int, err error)
+
+	// Rollback cancels and undoes the innermost pending update level.
+	// Rollback decrements the "nesting" counter.  If a Filer implements
+	// some support for keeping structural integrity (write ahead log,
+	// journal, etc.) then the appropriate actions are to be taken.
+	// Invocation of an unbalanced Rollback is an error.
+	Rollback() error
+
+	// Analogous to os.File.FileInfo().Size().
+	Size() (int64, error)
+
+	// Analogous to os.Sync().
+	Sync() (err error)
+
+	// Analogous to os.File.Truncate().
+	Truncate(size int64) error
+
+	// Analogous to os.File.WriteAt(). Note: `off` is an absolute "file
+	// pointer" address and cannot be negative even when a Filer is a
+	// InnerFiler.
+	WriteAt(b []byte, off int64) (n int, err error)
+}
+
+var _ Filer = &InnerFiler{} // Ensure InnerFiler is a Filer.
+
+// A InnerFiler is a Filer with added addressing/size translation.
+type InnerFiler struct {
+	outer Filer
+	off   int64
+}
+
+// NewInnerFiler returns a new InnerFiler wrapped by `outer` in a way which
+// adds `off` to every access.
+//
+// For example, considering:
+//
+// 	inner := NewInnerFiler(outer, 10)
+//
+// then
+//
+// 	inner.WriteAt([]byte{42}, 4)
+//
+// translates to
+//
+// 	outer.WriteAt([]byte{42}, 14)
+//
+// But an attempt to emulate
+//
+// 	outer.WriteAt([]byte{17}, 9)
+//
+// by
+//
+// 	inner.WriteAt([]byte{17}, -1)
+//
+// will fail as the `off` parameter can never be < 0. Also note that
+//
+// 	inner.Size() == outer.Size() - off,
+//
+// i.e. `inner` pretends no `outer` exists. Finally, after e.g.
+//
+// 	inner.Truncate(7)
+// 	outer.Size() == 17
+//
+// will be true.
+func NewInnerFiler(outer Filer, off int64) *InnerFiler { return &InnerFiler{outer, off} }
+
+// BeginUpdate implements Filer.
+func (f *InnerFiler) BeginUpdate() error { return f.outer.BeginUpdate() }
+
+// Close implements Filer.
+func (f *InnerFiler) Close() (err error) { return f.outer.Close() }
+
+// EndUpdate implements Filer.
+func (f *InnerFiler) EndUpdate() error { return f.outer.EndUpdate() }
+
+// Name implements Filer.
+func (f *InnerFiler) Name() string { return f.outer.Name() }
+
+// PunchHole implements Filer. `off`, `size` must be >= 0.
+func (f *InnerFiler) PunchHole(off, size int64) error { return f.outer.PunchHole(f.off+off, size) }
+
+// ReadAt implements Filer. `off` must be >= 0.
+func (f *InnerFiler) ReadAt(b []byte, off int64) (n int, err error) {
+	if off < 0 {
+		return 0, &ErrINVAL{f.outer.Name() + ":ReadAt invalid off", off}
+	}
+
+	return f.outer.ReadAt(b, f.off+off)
+}
+
+// Rollback implements Filer.
+func (f *InnerFiler) Rollback() error { return f.outer.Rollback() }
+
+// Size implements Filer.
+func (f *InnerFiler) Size() (int64, error) {
+	sz, err := f.outer.Size()
+	if err != nil {
+		return 0, err
+	}
+
+	return mathutil.MaxInt64(sz-f.off, 0), nil
+}
+
+// Sync() implements Filer.
+func (f *InnerFiler) Sync() (err error) {
+	return f.outer.Sync()
+}
+
+// Truncate implements Filer.
+func (f *InnerFiler) Truncate(size int64) error { return f.outer.Truncate(size + f.off) }
+
+// WriteAt implements Filer. `off` must be >= 0.
+func (f *InnerFiler) WriteAt(b []byte, off int64) (n int, err error) {
+	if off < 0 {
+		return 0, &ErrINVAL{f.outer.Name() + ":WriteAt invalid off", off}
+	}
+
+	return f.outer.WriteAt(b, f.off+off)
+}

+ 812 - 0
vendor/github.com/cznic/lldb/gb.go

@@ -0,0 +1,812 @@
+// Copyright 2014 The lldb Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Utilities to encode/decode and collate Go predeclared scalar types (and the
+// typeless nil and []byte).  The encoding format is a variation of the one
+// used by the "encoding/gob" package.
+
+package lldb
+
+import (
+	"bytes"
+	"fmt"
+	"math"
+
+	"github.com/cznic/mathutil"
+)
+
+const (
+	gbNull     = iota // 0x00
+	gbFalse           // 0x01
+	gbTrue            // 0x02
+	gbFloat0          // 0x03
+	gbFloat1          // 0x04
+	gbFloat2          // 0x05
+	gbFloat3          // 0x06
+	gbFloat4          // 0x07
+	gbFloat5          // 0x08
+	gbFloat6          // 0x09
+	gbFloat7          // 0x0a
+	gbFloat8          // 0x0b
+	gbComplex0        // 0x0c
+	gbComplex1        // 0x0d
+	gbComplex2        // 0x0e
+	gbComplex3        // 0x0f
+	gbComplex4        // 0x10
+	gbComplex5        // 0x11
+	gbComplex6        // 0x12
+	gbComplex7        // 0x13
+	gbComplex8        // 0x14
+	gbBytes00         // 0x15
+	gbBytes01         // 0x16
+	gbBytes02         // 0x17
+	gbBytes03         // 0x18
+	gbBytes04         // 0x19
+	gbBytes05         // 0x1a
+	gbBytes06         // 0x1b
+	gbBytes07         // 0x1c
+	gbBytes08         // 0x1d
+	gbBytes09         // 0x1e
+	gbBytes10         // 0x1f
+	gbBytes11         // 0x20
+	gbBytes12         // 0x21
+	gbBytes13         // 0x22
+	gbBytes14         // 0x23
+	gbBytes15         // 0x24
+	gbBytes16         // 0x25
+	gbBytes17         // Ox26
+	gbBytes1          // 0x27
+	gbBytes2          // 0x28: Offset by one to allow 64kB sized []byte.
+	gbString00        // 0x29
+	gbString01        // 0x2a
+	gbString02        // 0x2b
+	gbString03        // 0x2c
+	gbString04        // 0x2d
+	gbString05        // 0x2e
+	gbString06        // 0x2f
+	gbString07        // 0x30
+	gbString08        // 0x31
+	gbString09        // 0x32
+	gbString10        // 0x33
+	gbString11        // 0x34
+	gbString12        // 0x35
+	gbString13        // 0x36
+	gbString14        // 0x37
+	gbString15        // 0x38
+	gbString16        // 0x39
+	gbString17        // 0x3a
+	gbString1         // 0x3b
+	gbString2         // 0x3c
+	gbUintP1          // 0x3d
+	gbUintP2          // 0x3e
+	gbUintP3          // 0x3f
+	gbUintP4          // 0x40
+	gbUintP5          // 0x41
+	gbUintP6          // 0x42
+	gbUintP7          // 0x43
+	gbUintP8          // 0x44
+	gbIntM8           // 0x45
+	gbIntM7           // 0x46
+	gbIntM6           // 0x47
+	gbIntM5           // 0x48
+	gbIntM4           // 0x49
+	gbIntM3           // 0x4a
+	gbIntM2           // 0x4b
+	gbIntM1           // 0x4c
+	gbIntP1           // 0x4d
+	gbIntP2           // 0x4e
+	gbIntP3           // 0x4f
+	gbIntP4           // 0x50
+	gbIntP5           // 0x51
+	gbIntP6           // 0x52
+	gbIntP7           // 0x53
+	gbIntP8           // 0x54
+	gbInt0            // 0x55
+
+	gbIntMax = 255 - gbInt0 // 0xff == 170
+)
+
+// EncodeScalars encodes a vector of predeclared scalar type values to a
+// []byte, making it suitable to store it as a "record" in a DB or to use it as
+// a key of a BTree.
+func EncodeScalars(scalars ...interface{}) (b []byte, err error) {
+	for _, scalar := range scalars {
+		switch x := scalar.(type) {
+		default:
+			return nil, &ErrINVAL{"EncodeScalars: unsupported type", fmt.Sprintf("%T in `%#v`", x, scalars)}
+
+		case nil:
+			b = append(b, gbNull)
+
+		case bool:
+			switch x {
+			case false:
+				b = append(b, gbFalse)
+			case true:
+				b = append(b, gbTrue)
+			}
+
+		case float32:
+			encFloat(float64(x), &b)
+		case float64:
+			encFloat(x, &b)
+
+		case complex64:
+			encComplex(complex128(x), &b)
+		case complex128:
+			encComplex(x, &b)
+
+		case string:
+			n := len(x)
+			if n <= 17 {
+				b = append(b, byte(gbString00+n))
+				b = append(b, []byte(x)...)
+				break
+			}
+
+			if n > 65535 {
+				return nil, fmt.Errorf("EncodeScalars: cannot encode string of length %d (limit 65536)", n)
+			}
+
+			pref := byte(gbString1)
+			if n > 255 {
+				pref++
+			}
+			b = append(b, pref)
+			encUint0(uint64(n), &b)
+			b = append(b, []byte(x)...)
+
+		case int8:
+			encInt(int64(x), &b)
+		case int16:
+			encInt(int64(x), &b)
+		case int32:
+			encInt(int64(x), &b)
+		case int64:
+			encInt(x, &b)
+		case int:
+			encInt(int64(x), &b)
+
+		case uint8:
+			encUint(uint64(x), &b)
+		case uint16:
+			encUint(uint64(x), &b)
+		case uint32:
+			encUint(uint64(x), &b)
+		case uint64:
+			encUint(x, &b)
+		case uint:
+			encUint(uint64(x), &b)
+		case []byte:
+			n := len(x)
+			if n <= 17 {
+				b = append(b, byte(gbBytes00+n))
+				b = append(b, []byte(x)...)
+				break
+			}
+
+			if n > 655356 {
+				return nil, fmt.Errorf("EncodeScalars: cannot encode []byte of length %d (limit 65536)", n)
+			}
+
+			pref := byte(gbBytes1)
+			if n > 255 {
+				pref++
+			}
+			b = append(b, pref)
+			if n <= 255 {
+				b = append(b, byte(n))
+			} else {
+				n--
+				b = append(b, byte(n>>8), byte(n))
+			}
+			b = append(b, x...)
+		}
+	}
+	return
+}
+
+func encComplex(f complex128, b *[]byte) {
+	encFloatPrefix(gbComplex0, real(f), b)
+	encFloatPrefix(gbComplex0, imag(f), b)
+}
+
+func encFloatPrefix(prefix byte, f float64, b *[]byte) {
+	u := math.Float64bits(f)
+	var n uint64
+	for i := 0; i < 8; i++ {
+		n <<= 8
+		n |= u & 0xFF
+		u >>= 8
+	}
+	bits := mathutil.BitLenUint64(n)
+	if bits == 0 {
+		*b = append(*b, prefix)
+		return
+	}
+
+	// 0 1 2 3 4 5 6 7 8 9
+	// . 1 1 1 1 1 1 1 1 2
+	encUintPrefix(prefix+1+byte((bits-1)>>3), n, b)
+}
+
+func encFloat(f float64, b *[]byte) {
+	encFloatPrefix(gbFloat0, f, b)
+}
+
+func encUint0(n uint64, b *[]byte) {
+	switch {
+	case n <= 0xff:
+		*b = append(*b, byte(n))
+	case n <= 0xffff:
+		*b = append(*b, byte(n>>8), byte(n))
+	case n <= 0xffffff:
+		*b = append(*b, byte(n>>16), byte(n>>8), byte(n))
+	case n <= 0xffffffff:
+		*b = append(*b, byte(n>>24), byte(n>>16), byte(n>>8), byte(n))
+	case n <= 0xffffffffff:
+		*b = append(*b, byte(n>>32), byte(n>>24), byte(n>>16), byte(n>>8), byte(n))
+	case n <= 0xffffffffffff:
+		*b = append(*b, byte(n>>40), byte(n>>32), byte(n>>24), byte(n>>16), byte(n>>8), byte(n))
+	case n <= 0xffffffffffffff:
+		*b = append(*b, byte(n>>48), byte(n>>40), byte(n>>32), byte(n>>24), byte(n>>16), byte(n>>8), byte(n))
+	case n <= math.MaxUint64:
+		*b = append(*b, byte(n>>56), byte(n>>48), byte(n>>40), byte(n>>32), byte(n>>24), byte(n>>16), byte(n>>8), byte(n))
+	}
+}
+
+func encUintPrefix(prefix byte, n uint64, b *[]byte) {
+	*b = append(*b, prefix)
+	encUint0(n, b)
+}
+
+func encUint(n uint64, b *[]byte) {
+	bits := mathutil.Max(1, mathutil.BitLenUint64(n))
+	encUintPrefix(gbUintP1+byte((bits-1)>>3), n, b)
+}
+
+func encInt(n int64, b *[]byte) {
+	switch {
+	case n < -0x100000000000000:
+		*b = append(*b, byte(gbIntM8), byte(n>>56), byte(n>>48), byte(n>>40), byte(n>>32), byte(n>>24), byte(n>>16), byte(n>>8), byte(n))
+	case n < -0x1000000000000:
+		*b = append(*b, byte(gbIntM7), byte(n>>48), byte(n>>40), byte(n>>32), byte(n>>24), byte(n>>16), byte(n>>8), byte(n))
+	case n < -0x10000000000:
+		*b = append(*b, byte(gbIntM6), byte(n>>40), byte(n>>32), byte(n>>24), byte(n>>16), byte(n>>8), byte(n))
+	case n < -0x100000000:
+		*b = append(*b, byte(gbIntM5), byte(n>>32), byte(n>>24), byte(n>>16), byte(n>>8), byte(n))
+	case n < -0x1000000:
+		*b = append(*b, byte(gbIntM4), byte(n>>24), byte(n>>16), byte(n>>8), byte(n))
+	case n < -0x10000:
+		*b = append(*b, byte(gbIntM3), byte(n>>16), byte(n>>8), byte(n))
+	case n < -0x100:
+		*b = append(*b, byte(gbIntM2), byte(n>>8), byte(n))
+	case n < 0:
+		*b = append(*b, byte(gbIntM1), byte(n))
+	case n <= gbIntMax:
+		*b = append(*b, byte(gbInt0+n))
+	case n <= 0xff:
+		*b = append(*b, gbIntP1, byte(n))
+	case n <= 0xffff:
+		*b = append(*b, gbIntP2, byte(n>>8), byte(n))
+	case n <= 0xffffff:
+		*b = append(*b, gbIntP3, byte(n>>16), byte(n>>8), byte(n))
+	case n <= 0xffffffff:
+		*b = append(*b, gbIntP4, byte(n>>24), byte(n>>16), byte(n>>8), byte(n))
+	case n <= 0xffffffffff:
+		*b = append(*b, gbIntP5, byte(n>>32), byte(n>>24), byte(n>>16), byte(n>>8), byte(n))
+	case n <= 0xffffffffffff:
+		*b = append(*b, gbIntP6, byte(n>>40), byte(n>>32), byte(n>>24), byte(n>>16), byte(n>>8), byte(n))
+	case n <= 0xffffffffffffff:
+		*b = append(*b, gbIntP7, byte(n>>48), byte(n>>40), byte(n>>32), byte(n>>24), byte(n>>16), byte(n>>8), byte(n))
+	case n <= 0x7fffffffffffffff:
+		*b = append(*b, gbIntP8, byte(n>>56), byte(n>>48), byte(n>>40), byte(n>>32), byte(n>>24), byte(n>>16), byte(n>>8), byte(n))
+	}
+}
+
+func decodeFloat(b []byte) float64 {
+	var u uint64
+	for i, v := range b {
+		u |= uint64(v) << uint((i+8-len(b))*8)
+	}
+	return math.Float64frombits(u)
+}
+
+// DecodeScalars decodes a []byte produced by EncodeScalars.
+func DecodeScalars(b []byte) (scalars []interface{}, err error) {
+	b0 := b
+	for len(b) != 0 {
+		switch tag := b[0]; tag {
+		//default:
+		//return nil, fmt.Errorf("tag %d(%#x) not supported", b[0], b[0])
+		case gbNull:
+			scalars = append(scalars, nil)
+			b = b[1:]
+		case gbFalse:
+			scalars = append(scalars, false)
+			b = b[1:]
+		case gbTrue:
+			scalars = append(scalars, true)
+			b = b[1:]
+		case gbFloat0:
+			scalars = append(scalars, 0.0)
+			b = b[1:]
+		case gbFloat1, gbFloat2, gbFloat3, gbFloat4, gbFloat5, gbFloat6, gbFloat7, gbFloat8:
+			n := 1 + int(tag) - gbFloat0
+			if len(b) < n-1 {
+				goto corrupted
+			}
+
+			scalars = append(scalars, decodeFloat(b[1:n]))
+			b = b[n:]
+		case gbComplex0, gbComplex1, gbComplex2, gbComplex3, gbComplex4, gbComplex5, gbComplex6, gbComplex7, gbComplex8:
+			n := 1 + int(tag) - gbComplex0
+			if len(b) < n-1 {
+				goto corrupted
+			}
+
+			re := decodeFloat(b[1:n])
+			b = b[n:]
+
+			if len(b) == 0 {
+				goto corrupted
+			}
+
+			tag = b[0]
+			if tag < gbComplex0 || tag > gbComplex8 {
+				goto corrupted
+			}
+
+			n = 1 + int(tag) - gbComplex0
+			if len(b) < n-1 {
+				goto corrupted
+			}
+
+			scalars = append(scalars, complex(re, decodeFloat(b[1:n])))
+			b = b[n:]
+		case gbBytes00, gbBytes01, gbBytes02, gbBytes03, gbBytes04,
+			gbBytes05, gbBytes06, gbBytes07, gbBytes08, gbBytes09,
+			gbBytes10, gbBytes11, gbBytes12, gbBytes13, gbBytes14,
+			gbBytes15, gbBytes16, gbBytes17:
+			n := int(tag - gbBytes00)
+			if len(b) < n+1 {
+				goto corrupted
+			}
+
+			scalars = append(scalars, append([]byte(nil), b[1:n+1]...))
+			b = b[n+1:]
+		case gbBytes1:
+			if len(b) < 2 {
+				goto corrupted
+			}
+
+			n := int(b[1])
+			b = b[2:]
+			if len(b) < n {
+				goto corrupted
+			}
+
+			scalars = append(scalars, append([]byte(nil), b[:n]...))
+			b = b[n:]
+		case gbBytes2:
+			if len(b) < 3 {
+				goto corrupted
+			}
+
+			n := int(b[1])<<8 | int(b[2]) + 1
+			b = b[3:]
+			if len(b) < n {
+				goto corrupted
+			}
+
+			scalars = append(scalars, append([]byte(nil), b[:n]...))
+			b = b[n:]
+		case gbString00, gbString01, gbString02, gbString03, gbString04,
+			gbString05, gbString06, gbString07, gbString08, gbString09,
+			gbString10, gbString11, gbString12, gbString13, gbString14,
+			gbString15, gbString16, gbString17:
+			n := int(tag - gbString00)
+			if len(b) < n+1 {
+				goto corrupted
+			}
+
+			scalars = append(scalars, string(b[1:n+1]))
+			b = b[n+1:]
+		case gbString1:
+			if len(b) < 2 {
+				goto corrupted
+			}
+
+			n := int(b[1])
+			b = b[2:]
+			if len(b) < n {
+				goto corrupted
+			}
+
+			scalars = append(scalars, string(b[:n]))
+			b = b[n:]
+		case gbString2:
+			if len(b) < 3 {
+				goto corrupted
+			}
+
+			n := int(b[1])<<8 | int(b[2])
+			b = b[3:]
+			if len(b) < n {
+				goto corrupted
+			}
+
+			scalars = append(scalars, string(b[:n]))
+			b = b[n:]
+		case gbUintP1, gbUintP2, gbUintP3, gbUintP4, gbUintP5, gbUintP6, gbUintP7, gbUintP8:
+			b = b[1:]
+			n := 1 + int(tag) - gbUintP1
+			if len(b) < n {
+				goto corrupted
+			}
+
+			var u uint64
+			for _, v := range b[:n] {
+				u = u<<8 | uint64(v)
+			}
+			scalars = append(scalars, u)
+			b = b[n:]
+		case gbIntM8, gbIntM7, gbIntM6, gbIntM5, gbIntM4, gbIntM3, gbIntM2, gbIntM1:
+			b = b[1:]
+			n := 8 - (int(tag) - gbIntM8)
+			if len(b) < n {
+				goto corrupted
+			}
+			u := uint64(math.MaxUint64)
+			for _, v := range b[:n] {
+				u = u<<8 | uint64(v)
+			}
+			scalars = append(scalars, int64(u))
+			b = b[n:]
+		case gbIntP1, gbIntP2, gbIntP3, gbIntP4, gbIntP5, gbIntP6, gbIntP7, gbIntP8:
+			b = b[1:]
+			n := 1 + int(tag) - gbIntP1
+			if len(b) < n {
+				goto corrupted
+			}
+
+			i := int64(0)
+			for _, v := range b[:n] {
+				i = i<<8 | int64(v)
+			}
+			scalars = append(scalars, i)
+			b = b[n:]
+		default:
+			scalars = append(scalars, int64(b[0])-gbInt0)
+			b = b[1:]
+		}
+	}
+	return append([]interface{}(nil), scalars...), nil
+
+corrupted:
+	return nil, &ErrDecodeScalars{append([]byte(nil), b0...), len(b0) - len(b)}
+}
+
+func collateComplex(x, y complex128) int {
+	switch rx, ry := real(x), real(y); {
+	case rx < ry:
+		return -1
+	case rx == ry:
+		switch ix, iy := imag(x), imag(y); {
+		case ix < iy:
+			return -1
+		case ix == iy:
+			return 0
+		case ix > iy:
+			return 1
+		}
+	}
+	//case rx > ry:
+	return 1
+}
+
+func collateFloat(x, y float64) int {
+	switch {
+	case x < y:
+		return -1
+	case x == y:
+		return 0
+	}
+	//case x > y:
+	return 1
+}
+
+func collateInt(x, y int64) int {
+	switch {
+	case x < y:
+		return -1
+	case x == y:
+		return 0
+	}
+	//case x > y:
+	return 1
+}
+
+func collateUint(x, y uint64) int {
+	switch {
+	case x < y:
+		return -1
+	case x == y:
+		return 0
+	}
+	//case x > y:
+	return 1
+}
+
+func collateIntUint(x int64, y uint64) int {
+	if y > math.MaxInt64 {
+		return -1
+	}
+
+	return collateInt(x, int64(y))
+}
+
+func collateUintInt(x uint64, y int64) int {
+	return -collateIntUint(y, x)
+}
+
+func collateType(i interface{}) (r interface{}, err error) {
+	switch x := i.(type) {
+	default:
+		return nil, fmt.Errorf("invalid collate type %T", x)
+	case nil:
+		return i, nil
+	case bool:
+		return i, nil
+	case int8:
+		return int64(x), nil
+	case int16:
+		return int64(x), nil
+	case int32:
+		return int64(x), nil
+	case int64:
+		return i, nil
+	case int:
+		return int64(x), nil
+	case uint8:
+		return uint64(x), nil
+	case uint16:
+		return uint64(x), nil
+	case uint32:
+		return uint64(x), nil
+	case uint64:
+		return i, nil
+	case uint:
+		return uint64(x), nil
+	case float32:
+		return float64(x), nil
+	case float64:
+		return i, nil
+	case complex64:
+		return complex128(x), nil
+	case complex128:
+		return i, nil
+	case []byte:
+		return i, nil
+	case string:
+		return i, nil
+	}
+}
+
+// Collate collates two arrays of Go predeclared scalar types (and the typeless
+// nil or []byte). If any other type appears in x or y, Collate will return a
+// non nil error.  String items are collated using strCollate or lexically
+// byte-wise (as when using Go comparison operators) when strCollate is nil.
+// []byte items are collated using bytes.Compare.
+//
+// Collate returns:
+//
+// 	-1 if x <  y
+// 	 0 if x == y
+// 	+1 if x >  y
+//
+// The same value as defined above must be returned from strCollate.
+//
+// The "outer" ordering is: nil, bool, number, []byte, string. IOW, nil is
+// "smaller" than anything else except other nil, numbers collate before
+// []byte, []byte collate before strings, etc.
+//
+// Integers and real numbers collate as expected in math. However, complex
+// numbers are not ordered in Go. Here the ordering is defined: Complex numbers
+// are in comparison considered first only by their real part. Iff the result
+// is equality then the imaginary part is used to determine the ordering. In
+// this "second order" comparing, integers and real numbers are considered as
+// complex numbers with a zero imaginary part.
+func Collate(x, y []interface{}, strCollate func(string, string) int) (r int, err error) {
+	nx, ny := len(x), len(y)
+
+	switch {
+	case nx == 0 && ny != 0:
+		return -1, nil
+	case nx == 0 && ny == 0:
+		return 0, nil
+	case nx != 0 && ny == 0:
+		return 1, nil
+	}
+
+	r = 1
+	if nx > ny {
+		x, y, r = y, x, -r
+	}
+
+	var c int
+	for i, xi0 := range x {
+		yi0 := y[i]
+		xi, err := collateType(xi0)
+		if err != nil {
+			return 0, err
+		}
+
+		yi, err := collateType(yi0)
+		if err != nil {
+			return 0, err
+		}
+
+		switch x := xi.(type) {
+		default:
+			panic(fmt.Errorf("internal error: %T", x))
+
+		case nil:
+			switch yi.(type) {
+			case nil:
+				// nop
+			default:
+				return -r, nil
+			}
+
+		case bool:
+			switch y := yi.(type) {
+			case nil:
+				return r, nil
+			case bool:
+				switch {
+				case !x && y:
+					return -r, nil
+				case x == y:
+					// nop
+				case x && !y:
+					return r, nil
+				}
+			default:
+				return -r, nil
+			}
+
+		case int64:
+			switch y := yi.(type) {
+			case nil, bool:
+				return r, nil
+			case int64:
+				c = collateInt(x, y)
+			case uint64:
+				c = collateIntUint(x, y)
+			case float64:
+				c = collateFloat(float64(x), y)
+			case complex128:
+				c = collateComplex(complex(float64(x), 0), y)
+			case []byte:
+				return -r, nil
+			case string:
+				return -r, nil
+			}
+
+			if c != 0 {
+				return c * r, nil
+			}
+
+		case uint64:
+			switch y := yi.(type) {
+			case nil, bool:
+				return r, nil
+			case int64:
+				c = collateUintInt(x, y)
+			case uint64:
+				c = collateUint(x, y)
+			case float64:
+				c = collateFloat(float64(x), y)
+			case complex128:
+				c = collateComplex(complex(float64(x), 0), y)
+			case []byte:
+				return -r, nil
+			case string:
+				return -r, nil
+			}
+
+			if c != 0 {
+				return c * r, nil
+			}
+
+		case float64:
+			switch y := yi.(type) {
+			case nil, bool:
+				return r, nil
+			case int64:
+				c = collateFloat(x, float64(y))
+			case uint64:
+				c = collateFloat(x, float64(y))
+			case float64:
+				c = collateFloat(x, y)
+			case complex128:
+				c = collateComplex(complex(x, 0), y)
+			case []byte:
+				return -r, nil
+			case string:
+				return -r, nil
+			}
+
+			if c != 0 {
+				return c * r, nil
+			}
+
+		case complex128:
+			switch y := yi.(type) {
+			case nil, bool:
+				return r, nil
+			case int64:
+				c = collateComplex(x, complex(float64(y), 0))
+			case uint64:
+				c = collateComplex(x, complex(float64(y), 0))
+			case float64:
+				c = collateComplex(x, complex(y, 0))
+			case complex128:
+				c = collateComplex(x, y)
+			case []byte:
+				return -r, nil
+			case string:
+				return -r, nil
+			}
+
+			if c != 0 {
+				return c * r, nil
+			}
+
+		case []byte:
+			switch y := yi.(type) {
+			case nil, bool, int64, uint64, float64, complex128:
+				return r, nil
+			case []byte:
+				c = bytes.Compare(x, y)
+			case string:
+				return -r, nil
+			}
+
+			if c != 0 {
+				return c * r, nil
+			}
+
+		case string:
+			switch y := yi.(type) {
+			case nil, bool, int64, uint64, float64, complex128:
+				return r, nil
+			case []byte:
+				return r, nil
+			case string:
+				switch {
+				case strCollate != nil:
+					c = strCollate(x, y)
+				case x < y:
+					return -r, nil
+				case x == y:
+					c = 0
+				case x > y:
+					return r, nil
+				}
+			}
+
+			if c != 0 {
+				return c * r, nil
+			}
+		}
+	}
+
+	if nx == ny {
+		return 0, nil
+	}
+
+	return -r, nil
+}

+ 160 - 0
vendor/github.com/cznic/lldb/lldb.go

@@ -0,0 +1,160 @@
+// Copyright 2014 The lldb Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package lldb implements a low level database engine. The database model used
+// could be considered a specific implementation of some small(est)
+// intersection of models listed in [1]. As a settled term is lacking, it'll be
+// called here a 'Virtual memory model' (VMM).
+//
+// Changelog
+//
+// 2016-07-24: v1.0.4 brings some performance improvements.
+//
+// 2016-07-22: v1.0.3 brings some small performance improvements.
+//
+// 2016-07-12: v1.0.2 now uses packages from cznic/internal.
+//
+// 2016-07-12: v1.0.1 adds a license for testdata/fortunes.txt.
+//
+// 2016-07-11: First standalone release v1.0.0 of the package previously
+// published as experimental (github.com/cznic/exp/lldb).
+//
+// Filers
+//
+// A Filer is an abstraction of storage. A Filer may be a part of some process'
+// virtual address space, an OS file, a networked, remote file etc. Persistence
+// of the storage is optional, opaque to VMM and it is specific to a concrete
+// Filer implementation.
+//
+// Space management
+//
+// Mechanism to allocate, reallocate (resize), deallocate (and later reclaim
+// the unused) contiguous parts of a Filer, called blocks.  Blocks are
+// identified and referred to by a handle, an int64.
+//
+// BTrees
+//
+// In addition to the VMM like services, lldb provides volatile and
+// non-volatile BTrees. Keys and values of a BTree are limited in size to 64kB
+// each (a bit more actually). Support for larger keys/values, if desired, can
+// be built atop a BTree to certain limits.
+//
+// Handles vs pointers
+//
+// A handle is the abstracted storage counterpart of a memory address.  There
+// is one fundamental difference, though. Resizing a block never results in a
+// change to the handle which refers to the resized block, so a handle is more
+// akin to an unique numeric id/key. Yet it shares one property of pointers -
+// handles can be associated again with blocks after the original handle block
+// was deallocated. In other words, a handle uniqueness domain is the state of
+// the database and is not something comparable to e.g. an ever growing
+// numbering sequence.
+//
+// Also, as with memory pointers, dangling handles can be created and blocks
+// overwritten when such handles are used. Using a zero handle to refer to a
+// block will not panic; however, the resulting error is effectively the same
+// exceptional situation as dereferencing a nil pointer.
+//
+// Blocks
+//
+// Allocated/used blocks, are limited in size to only a little bit more than
+// 64kB.  Bigger semantic entities/structures must be built in lldb's client
+// code.  The content of a block has no semantics attached, it's only a fully
+// opaque `[]byte`.
+//
+// Scalars
+//
+// Use of "scalars" applies to EncodeScalars, DecodeScalars and Collate. Those
+// first two "to bytes" and "from bytes" functions are suggested for handling
+// multi-valued Allocator content items and/or keys/values of BTrees (using
+// Collate for keys). Types called "scalar" are:
+//
+//	nil (the typeless one)
+//	bool
+//	all integral types: [u]int8, [u]int16, [u]int32, [u]int, [u]int64
+//	all floating point types: float32, float64
+//	all complex types: complex64, complex128
+//	[]byte (64kB max)
+//	string (64kb max)
+//
+// Specific implementations
+//
+// Included are concrete implementations of some of the VMM interfaces included
+// to ease serving simple client code or for testing and possibly as an
+// example.  More details in the documentation of such implementations.
+//
+//  [1]: http://en.wikipedia.org/wiki/Database_model
+package lldb
+
+const (
+	fltSz            = 0x70 // size of the FLT
+	maxShort         = 251
+	maxRq            = 65787
+	maxFLTRq         = 4112
+	maxHandle        = 1<<56 - 1
+	atomLen          = 16
+	tagUsedLong      = 0xfc
+	tagUsedRelocated = 0xfd
+	tagFreeShort     = 0xfe
+	tagFreeLong      = 0xff
+	tagNotCompressed = 0
+	tagCompressed    = 1
+)
+
+// Content size n -> blocksize in atoms.
+func n2atoms(n int) int {
+	if n > maxShort {
+		n += 2
+	}
+	return (n+1)/16 + 1
+}
+
+// Content size n -> number of padding zeros.
+func n2padding(n int) int {
+	if n > maxShort {
+		n += 2
+	}
+	return 15 - (n+1)&15
+}
+
+// Handle <-> offset
+func h2off(h int64) int64   { return (h + 6) * 16 }
+func off2h(off int64) int64 { return off/16 - 6 }
+
+// Get a 7B int64 from b
+func b2h(b []byte) (h int64) {
+	for _, v := range b[:7] {
+		h = h<<8 | int64(v)
+	}
+	return
+}
+
+// Put a 7B int64 into b
+func h2b(b []byte, h int64) []byte {
+	for i := range b[:7] {
+		b[i], h = byte(h>>48), h<<8
+	}
+	return b
+}
+
+// Content length N (must be in [252, 65787]) to long used block M field.
+func n2m(n int) (m int) {
+	return n % 0x10000
+}
+
+// Long used block M (must be in [0, 65535]) field to content length N.
+func m2n(m int) (n int) {
+	if m <= maxShort {
+		m += 0x10000
+	}
+	return m
+}
+
+func bpack(a []byte) []byte {
+	if cap(a) > len(a) {
+		return append([]byte(nil), a...)
+	}
+
+	return a
+}

+ 107 - 0
vendor/github.com/cznic/lldb/memfiler.go

@@ -0,0 +1,107 @@
+// Copyright 2014 The lldb Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// A memory-only implementation of Filer.
+
+package lldb
+
+import (
+	"fmt"
+	"io"
+
+	"github.com/cznic/internal/file"
+)
+
+const (
+	pgBits = 16
+	pgSize = 1 << pgBits
+	pgMask = pgSize - 1
+)
+
+var _ Filer = &MemFiler{}
+
+// MemFiler is a memory backed Filer. It implements BeginUpdate, EndUpdate and
+// Rollback as no-ops. MemFiler is not automatically persistent, but it has
+// ReadFrom and WriteTo methods.
+type MemFiler struct {
+	fi   file.Interface
+	nest int
+}
+
+// NewMemFiler returns a new MemFiler.
+func NewMemFiler() *MemFiler {
+	fi, err := file.OpenMem("")
+	if err != nil {
+		return nil
+	}
+
+	return &MemFiler{fi: fi}
+}
+
+// BeginUpdate implements Filer.
+func (f *MemFiler) BeginUpdate() error {
+	f.nest++
+	return nil
+}
+
+// Close implements Filer.
+func (f *MemFiler) Close() (err error) {
+	if f.nest != 0 {
+		return &ErrPERM{(f.Name() + ":Close")}
+	}
+
+	return f.fi.Close()
+}
+
+// EndUpdate implements Filer.
+func (f *MemFiler) EndUpdate() (err error) {
+	if f.nest == 0 {
+		return &ErrPERM{(f.Name() + ": EndUpdate")}
+	}
+
+	f.nest--
+	return
+}
+
+// Name implements Filer.
+func (f *MemFiler) Name() string { return fmt.Sprintf("%p.memfiler", f) }
+
+// PunchHole implements Filer.
+func (f *MemFiler) PunchHole(off, size int64) (err error) { return nil }
+
+// ReadAt implements Filer.
+func (f *MemFiler) ReadAt(b []byte, off int64) (n int, err error) { return f.fi.ReadAt(b, off) }
+
+// ReadFrom is a helper to populate MemFiler's content from r.  'n' reports the
+// number of bytes read from 'r'.
+func (f *MemFiler) ReadFrom(r io.Reader) (n int64, err error) { return f.fi.ReadFrom(r) }
+
+// Rollback implements Filer.
+func (f *MemFiler) Rollback() (err error) { return nil }
+
+// Size implements Filer.
+func (f *MemFiler) Size() (int64, error) {
+	info, err := f.fi.Stat()
+	if err != nil {
+		return 0, err
+	}
+
+	return info.Size(), nil
+}
+
+// Sync implements Filer.
+func (f *MemFiler) Sync() error { return nil }
+
+// Truncate implements Filer.
+func (f *MemFiler) Truncate(size int64) (err error) { return f.fi.Truncate(size) }
+
+// WriteAt implements Filer.
+func (f *MemFiler) WriteAt(b []byte, off int64) (n int, err error) { return f.fi.WriteAt(b, off) }
+
+// WriteTo is a helper to copy/persist MemFiler's content to w.  If w is also
+// an io.WriterAt then WriteTo may attempt to _not_ write any big, for some
+// value of big, runs of zeros, i.e. it will attempt to punch holes, where
+// possible, in `w` if that happens to be a freshly created or to zero length
+// truncated OS file.  'n' reports the number of bytes written to 'w'.
+func (f *MemFiler) WriteTo(w io.Writer) (n int64, err error) { return f.fi.WriteTo(w) }

+ 130 - 0
vendor/github.com/cznic/lldb/osfiler.go

@@ -0,0 +1,130 @@
+// Copyright 2014 The lldb Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lldb
+
+import (
+	"io"
+	"os"
+
+	"github.com/cznic/mathutil"
+)
+
+var _ Filer = (*OSFiler)(nil)
+
+// OSFile is an os.File like minimal set of methods allowing to construct a
+// Filer.
+type OSFile interface {
+	Name() string
+	Stat() (fi os.FileInfo, err error)
+	Sync() (err error)
+	Truncate(size int64) (err error)
+	io.Closer
+	io.Reader
+	io.ReaderAt
+	io.Seeker
+	io.Writer
+	io.WriterAt
+}
+
+// OSFiler is like a SimpleFileFiler but based on an OSFile.
+type OSFiler struct {
+	f    OSFile
+	nest int
+	size int64 // not set if < 0
+}
+
+// NewOSFiler returns a Filer from an OSFile. This Filer is like the
+// SimpleFileFiler, it does not implement the transaction related methods.
+func NewOSFiler(f OSFile) (r *OSFiler) {
+	return &OSFiler{
+		f:    f,
+		size: -1,
+	}
+}
+
+// BeginUpdate implements Filer.
+func (f *OSFiler) BeginUpdate() (err error) {
+	f.nest++
+	return nil
+}
+
+// Close implements Filer.
+func (f *OSFiler) Close() (err error) {
+	if f.nest != 0 {
+		return &ErrPERM{(f.Name() + ":Close")}
+	}
+
+	return f.f.Close()
+}
+
+// EndUpdate implements Filer.
+func (f *OSFiler) EndUpdate() (err error) {
+	if f.nest == 0 {
+		return &ErrPERM{(f.Name() + ":EndUpdate")}
+	}
+
+	f.nest--
+	return
+}
+
+// Name implements Filer.
+func (f *OSFiler) Name() string {
+	return f.f.Name()
+}
+
+// PunchHole implements Filer.
+func (f *OSFiler) PunchHole(off, size int64) (err error) {
+	return
+}
+
+// ReadAt implements Filer.
+func (f *OSFiler) ReadAt(b []byte, off int64) (n int, err error) {
+	return f.f.ReadAt(b, off)
+}
+
+// Rollback implements Filer.
+func (f *OSFiler) Rollback() (err error) { return }
+
+// Size implements Filer.
+func (f *OSFiler) Size() (n int64, err error) {
+	if f.size < 0 { // boot
+		fi, err := f.f.Stat()
+		if err != nil {
+			return 0, err
+		}
+
+		f.size = fi.Size()
+	}
+	return f.size, nil
+}
+
+// Sync implements Filer.
+func (f *OSFiler) Sync() (err error) {
+	return f.f.Sync()
+}
+
+// Truncate implements Filer.
+func (f *OSFiler) Truncate(size int64) (err error) {
+	if size < 0 {
+		return &ErrINVAL{"Truncate size", size}
+	}
+
+	f.size = size
+	return f.f.Truncate(size)
+}
+
+// WriteAt implements Filer.
+func (f *OSFiler) WriteAt(b []byte, off int64) (n int, err error) {
+	if f.size < 0 { // boot
+		fi, err := os.Stat(f.f.Name())
+		if err != nil {
+			return 0, err
+		}
+
+		f.size = fi.Size()
+	}
+	f.size = mathutil.MaxInt64(f.size, int64(len(b))+off)
+	return f.f.WriteAt(b, off)
+}

+ 99 - 0
vendor/github.com/cznic/lldb/simplefilefiler.go

@@ -0,0 +1,99 @@
+// Copyright 2014 The lldb Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// A basic os.File backed Filer.
+
+package lldb
+
+import (
+	"os"
+
+	"github.com/cznic/internal/file"
+)
+
+var _ Filer = &SimpleFileFiler{}
+
+// SimpleFileFiler is an os.File backed Filer intended for use where structural
+// consistency can be reached by other means (SimpleFileFiler is for example
+// wrapped in eg. an RollbackFiler or ACIDFiler0) or where persistence is not
+// required (temporary/working data sets).
+//
+// SimpleFileFiler is the most simple os.File backed Filer implementation as it
+// does not really implement BeginUpdate and EndUpdate/Rollback in any way
+// which would protect the structural integrity of data. If misused e.g. as a
+// real database storage w/o other measures, it can easily cause data loss
+// when, for example, a power outage occurs or the updating process terminates
+// abruptly.
+type SimpleFileFiler struct {
+	fi   file.Interface
+	name string
+	nest int
+}
+
+// NewSimpleFileFiler returns a new SimpleFileFiler.
+func NewSimpleFileFiler(f *os.File) *SimpleFileFiler {
+	fi, err := file.Open(f)
+	if err != nil {
+		return nil
+	}
+
+	sf := &SimpleFileFiler{fi: fi, name: f.Name()}
+	return sf
+}
+
+// BeginUpdate implements Filer.
+func (f *SimpleFileFiler) BeginUpdate() error {
+	f.nest++
+	return nil
+}
+
+// Close implements Filer.
+func (f *SimpleFileFiler) Close() (err error) {
+	if f.nest != 0 {
+		return &ErrPERM{(f.Name() + ":Close")}
+	}
+
+	return f.fi.Close()
+}
+
+// EndUpdate implements Filer.
+func (f *SimpleFileFiler) EndUpdate() (err error) {
+	if f.nest == 0 {
+		return &ErrPERM{(f.Name() + ":EndUpdate")}
+	}
+
+	f.nest--
+	return
+}
+
+// Name implements Filer.
+func (f *SimpleFileFiler) Name() string { return f.name }
+
+// PunchHole implements Filer.
+func (f *SimpleFileFiler) PunchHole(off, size int64) (err error) { return nil }
+
+// ReadAt implements Filer.
+func (f *SimpleFileFiler) ReadAt(b []byte, off int64) (n int, err error) { return f.fi.ReadAt(b, off) }
+
+// Rollback implements Filer.
+func (f *SimpleFileFiler) Rollback() (err error) { return nil }
+
+// Size implements Filer.
+func (f *SimpleFileFiler) Size() (int64, error) {
+	info, err := f.fi.Stat()
+	if err != nil {
+		return 0, err
+	}
+
+	return info.Size(), nil
+}
+
+// Sync implements Filer.
+func (f *SimpleFileFiler) Sync() error { return f.fi.Sync() }
+
+// Truncate implements Filer.
+func (f *SimpleFileFiler) Truncate(size int64) (err error) { return f.fi.Truncate(size) }
+
+// WriteAt implements Filer.
+func (f *SimpleFileFiler) WriteAt(b []byte, off int64) (n int, err error) { return f.fi.WriteAt(b, off) }

+ 616 - 0
vendor/github.com/cznic/lldb/xact.go

@@ -0,0 +1,616 @@
+// Copyright 2014 The lldb Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Structural transactions.
+
+package lldb
+
+//DONE+ TransactionalMemoryFiler
+//	----
+//	Use NewRollbackFiler(myMemFiler, ...)
+
+/*
+
+bfBits: 3
+BenchmarkRollbackFiler	20000000	       102 ns/op	   9.73 MB/s
+
+bfBits: 4
+BenchmarkRollbackFiler	50000000	        55.7 ns/op	  17.95 MB/s
+
+bfBits: 5
+BenchmarkRollbackFiler	100000000	        32.2 ns/op	  31.06 MB/s
+
+bfBits: 6
+BenchmarkRollbackFiler	100000000	        20.6 ns/op	  48.46 MB/s
+
+bfBits: 7
+BenchmarkRollbackFiler	100000000	        15.1 ns/op	  66.12 MB/s
+
+bfBits: 8
+BenchmarkRollbackFiler	100000000	        10.5 ns/op	  95.66 MB/s
+
+bfBits: 9
+BenchmarkRollbackFiler	200000000	         8.02 ns/op	 124.74 MB/s
+
+bfBits: 10
+BenchmarkRollbackFiler	200000000	         9.25 ns/op	 108.09 MB/s
+
+bfBits: 11
+BenchmarkRollbackFiler	100000000	        11.7 ns/op	  85.47 MB/s
+
+bfBits: 12
+BenchmarkRollbackFiler	100000000	        17.2 ns/op	  57.99 MB/s
+
+bfBits: 13
+BenchmarkRollbackFiler	100000000	        32.7 ns/op	  30.58 MB/s
+
+bfBits: 14
+BenchmarkRollbackFiler	50000000	        39.6 ns/op	  25.27 MB/s
+
+*/
+
+import (
+	"fmt"
+	"io"
+	"sync"
+
+	"github.com/cznic/fileutil"
+	"github.com/cznic/internal/buffer"
+	"github.com/cznic/mathutil"
+)
+
+var (
+	_ Filer = &bitFiler{}      // Ensure bitFiler is a Filer.
+	_ Filer = &RollbackFiler{} // ditto
+)
+
+const (
+	bfBits = 12
+	bfSize = 1 << bfBits
+	bfMask = bfSize - 1
+)
+
+type (
+	bitPage struct {
+		prev, next *bitPage
+		pdata      *[]byte
+		data       []byte
+		dirty      bool
+	}
+
+	bitFilerMap map[int64]*bitPage
+
+	bitFiler struct {
+		parent Filer
+		m      bitFilerMap
+		size   int64
+	}
+)
+
+func newBitFiler(parent Filer) (f *bitFiler, err error) {
+	sz, err := parent.Size()
+	if err != nil {
+		return
+	}
+
+	return &bitFiler{parent: parent, m: bitFilerMap{}, size: sz}, nil
+}
+
+func (f *bitFiler) BeginUpdate() error { panic("internal error") }
+func (f *bitFiler) EndUpdate() error   { panic("internal error") }
+func (f *bitFiler) Rollback() error    { panic("internal error") }
+func (f *bitFiler) Sync() error        { panic("internal error") }
+
+func (f *bitFiler) Close() (err error)   { return }
+func (f *bitFiler) Name() string         { return fmt.Sprintf("%p.bitfiler", f) }
+func (f *bitFiler) Size() (int64, error) { return f.size, nil }
+
+func (f *bitFiler) free() {
+	for _, pg := range f.m {
+		buffer.Put(pg.pdata)
+	}
+}
+
+func (f *bitFiler) PunchHole(off, size int64) (err error) {
+	first := off >> bfBits
+	if off&bfMask != 0 {
+		first++
+	}
+	off += size - 1
+	last := off >> bfBits
+	if off&bfMask != 0 {
+		last--
+	}
+	if limit := f.size >> bfBits; last > limit {
+		last = limit
+	}
+	for pgI := first; pgI <= last; pgI++ {
+		pg := &bitPage{}
+		pg.pdata = buffer.CGet(bfSize)
+		pg.data = *pg.pdata
+		pg.dirty = true
+		f.m[pgI] = pg
+	}
+	return
+}
+
+func (f *bitFiler) ReadAt(b []byte, off int64) (n int, err error) {
+	avail := f.size - off
+	pgI := off >> bfBits
+	pgO := int(off & bfMask)
+	rem := len(b)
+	if int64(rem) >= avail {
+		rem = int(avail)
+		err = io.EOF
+	}
+	for rem != 0 && avail > 0 {
+		pg := f.m[pgI]
+		if pg == nil {
+			pg = &bitPage{}
+			pg.pdata = buffer.CGet(bfSize)
+			pg.data = *pg.pdata
+			if f.parent != nil {
+				_, err = f.parent.ReadAt(pg.data, off&^bfMask)
+				if err != nil && !fileutil.IsEOF(err) {
+					return
+				}
+
+				err = nil
+			}
+			f.m[pgI] = pg
+		}
+		nc := copy(b[:mathutil.Min(rem, bfSize)], pg.data[pgO:])
+		pgI++
+		pgO = 0
+		rem -= nc
+		n += nc
+		b = b[nc:]
+		off += int64(nc)
+	}
+	return
+}
+
+func (f *bitFiler) Truncate(size int64) (err error) {
+	switch {
+	case size < 0:
+		return &ErrINVAL{"Truncate size", size}
+	case size == 0:
+		f.m = bitFilerMap{}
+		f.size = 0
+		return
+	}
+
+	first := size >> bfBits
+	if size&bfMask != 0 {
+		first++
+	}
+	last := f.size >> bfBits
+	if f.size&bfMask != 0 {
+		last++
+	}
+	for ; first < last; first++ {
+		if bp, ok := f.m[first]; ok {
+			buffer.Put(bp.pdata)
+		}
+		delete(f.m, first)
+	}
+
+	f.size = size
+	return
+}
+
+func (f *bitFiler) WriteAt(b []byte, off int64) (n int, err error) {
+	off0 := off
+	pgI := off >> bfBits
+	pgO := int(off & bfMask)
+	n = len(b)
+	rem := n
+	var nc int
+	for rem != 0 {
+		pg := f.m[pgI]
+		if pg == nil {
+			pg = &bitPage{}
+			pg.pdata = buffer.CGet(bfSize)
+			pg.data = *pg.pdata
+			if f.parent != nil {
+				_, err = f.parent.ReadAt(pg.data, off&^bfMask)
+				if err != nil && !fileutil.IsEOF(err) {
+					return
+				}
+
+				err = nil
+			}
+			f.m[pgI] = pg
+		}
+		nc = copy(pg.data[pgO:], b)
+		pgI++
+		pg.dirty = true
+		pgO = 0
+		rem -= nc
+		b = b[nc:]
+		off += int64(nc)
+	}
+	f.size = mathutil.MaxInt64(f.size, off0+int64(n))
+	return
+}
+
+func (f *bitFiler) link() {
+	for pgI, pg := range f.m {
+		nx, ok := f.m[pgI+1]
+		if !ok || !nx.dirty {
+			continue
+		}
+
+		nx.prev, pg.next = pg, nx
+	}
+}
+
+func (f *bitFiler) dumpDirty(w io.WriterAt) (nwr int, err error) {
+	f.link()
+	for pgI, pg := range f.m {
+		if !pg.dirty {
+			continue
+		}
+
+		for pg.prev != nil && pg.prev.dirty {
+			pg = pg.prev
+			pgI--
+		}
+
+		for pg != nil && pg.dirty {
+			if _, err := w.WriteAt(pg.data, pgI<<bfBits); err != nil {
+				return 0, err
+			}
+
+			nwr++
+			pg.dirty = false
+			pg = pg.next
+			pgI++
+		}
+	}
+	return
+}
+
+// RollbackFiler is a Filer implementing structural transaction handling.
+// Structural transactions should be small and short lived because all non
+// committed data are held in memory until committed or discarded by a
+// Rollback.
+//
+// While using RollbackFiler, every intended update of the wrapped Filler, by
+// WriteAt, Truncate or PunchHole, _must_ be made within a transaction.
+// Attempts to do it outside of a transaction will return ErrPERM. OTOH,
+// invoking ReadAt outside of a transaction is not a problem.
+//
+// No nested transactions: All updates within a transaction are held in memory.
+// On a matching EndUpdate the updates held in memory are actually written to
+// the wrapped Filer.
+//
+// Nested transactions: Correct data will be seen from RollbackFiler when any
+// level of a nested transaction is rollbacked. The actual writing to the
+// wrapped Filer happens only when the outer most transaction nesting level is
+// closed.
+//
+// Invoking Rollback is an alternative to EndUpdate. It discards all changes
+// made at the current transaction level and returns the "state" (possibly not
+// yet persisted) of the Filer to what it was before the corresponding
+// BeginUpdate.
+//
+// During an open transaction, all reads (using ReadAt) are "dirty" reads,
+// seeing the uncommitted changes made to the Filer's data.
+//
+// Lldb databases should be based upon a RollbackFiler.
+//
+// With a wrapped MemFiler one gets transactional memory. With, for example a
+// wrapped disk based SimpleFileFiler it protects against at least some HW
+// errors - if Rollback is properly invoked on such failures and/or if there's
+// some WAL or 2PC or whatever other safe mechanism based recovery procedure
+// used by the client.
+//
+// The "real" writes to the wrapped Filer (or WAL instead) go through the
+// writerAt supplied to NewRollbackFiler.
+//
+// List of functions/methods which are recommended to be wrapped in a
+// BeginUpdate/EndUpdate structural transaction:
+//
+// 	Allocator.Alloc
+// 	Allocator.Free
+// 	Allocator.Realloc
+//
+//	CreateBTree
+// 	RemoveBTree
+// 	BTree.Clear
+// 	BTree.Delete
+// 	BTree.DeleteAny
+// 	BTree.Clear
+// 	BTree.Extract
+// 	BTree.Get (it can mutate the DB)
+// 	BTree.Put
+// 	BTree.Set
+//
+// NOTE: RollbackFiler is a generic solution intended to wrap Filers provided
+// by this package which do not implement any of the transactional methods.
+// RollbackFiler thus _does not_ invoke any of the transactional methods of its
+// wrapped Filer.
+//
+// RollbackFiler is safe for concurrent use by multiple goroutines.
+type RollbackFiler struct {
+	mu           sync.RWMutex
+	inCallback   bool
+	inCallbackMu sync.RWMutex
+	bitFiler     *bitFiler
+	checkpoint   func(int64) error
+	closed       bool
+	f            Filer
+	parent       Filer
+	tlevel       int // transaction nesting level, 0 == not in transaction
+	writerAt     io.WriterAt
+
+	// afterRollback, if not nil, is called after performing Rollback
+	// without errros.
+	afterRollback func() error
+}
+
+// NewRollbackFiler returns a RollbackFiler wrapping f.
+//
+// The checkpoint parameter
+//
+// The checkpoint function is called after closing (by EndUpdate) the upper
+// most level open transaction if all calls of writerAt were successful and the
+// DB (or eg. a WAL) is thus now in a consistent state (virtually, in the ideal
+// world with no write caches, no HW failures, no process crashes, ...).
+//
+// NOTE: In, for example, a 2PC it is necessary to reflect also the sz
+// parameter as the new file size (as in the parameter to Truncate). All
+// changes were successfully written already by writerAt before invoking
+// checkpoint.
+//
+// The writerAt parameter
+//
+// The writerAt interface is used to commit the updates of the wrapped Filer.
+// If any invocation of writerAt fails then a non nil error will be returned
+// from EndUpdate and checkpoint will _not_ ne called.  Neither is necessary to
+// call Rollback. The rule of thumb: The [structural] transaction [level] is
+// closed by invoking exactly once one of EndUpdate _or_ Rollback.
+//
+// It is presumed that writerAt uses WAL or 2PC or whatever other safe
+// mechanism to physically commit the updates.
+//
+// Updates performed by invocations of writerAt are byte-precise, but not
+// necessarily maximum possible length precise. IOW, for example an update
+// crossing page boundaries may be performed by more than one writerAt
+// invocation.  No offset sorting is performed.  This may change if it proves
+// to be a problem. Such change would be considered backward compatible.
+//
+// NOTE: Using RollbackFiler, but failing to ever invoke a matching "closing"
+// EndUpdate after an "opening" BeginUpdate means neither writerAt or
+// checkpoint will ever get called - with all the possible data loss
+// consequences.
+func NewRollbackFiler(f Filer, checkpoint func(sz int64) error, writerAt io.WriterAt) (r *RollbackFiler, err error) {
+	if f == nil || checkpoint == nil || writerAt == nil {
+		return nil, &ErrINVAL{Src: "lldb.NewRollbackFiler, nil argument"}
+	}
+
+	return &RollbackFiler{
+		checkpoint: checkpoint,
+		f:          f,
+		writerAt:   writerAt,
+	}, nil
+}
+
+// Implements Filer.
+func (r *RollbackFiler) BeginUpdate() (err error) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	parent := r.f
+	if r.tlevel != 0 {
+		parent = r.bitFiler
+	}
+	r.bitFiler, err = newBitFiler(parent)
+	if err != nil {
+		return
+	}
+
+	r.tlevel++
+	return
+}
+
+// Implements Filer.
+//
+// Close will return an error if not invoked at nesting level 0.  However, to
+// allow emergency closing from eg. a signal handler; if Close is invoked
+// within an open transaction(s), it rollbacks any non committed open
+// transactions and performs the Close operation.
+//
+// IOW: Regardless of the transaction nesting level the Close is always
+// performed but any uncommitted transaction data are lost.
+func (r *RollbackFiler) Close() (err error) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	if r.closed {
+		return &ErrPERM{r.f.Name() + ": Already closed"}
+	}
+
+	r.closed = true
+	if err = r.f.Close(); err != nil {
+		return
+	}
+
+	if r.tlevel != 0 {
+		err = &ErrPERM{r.f.Name() + ": Close inside an open transaction"}
+	}
+
+	if r.bitFiler != nil {
+		r.bitFiler.free()
+		r.bitFiler = nil
+	}
+
+	return
+}
+
+// Implements Filer.
+func (r *RollbackFiler) EndUpdate() (err error) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	if r.tlevel == 0 {
+		return &ErrPERM{r.f.Name() + " : EndUpdate outside of a transaction"}
+	}
+
+	sz, err := r.size() // Cannot call .Size() -> deadlock
+	if err != nil {
+		return
+	}
+
+	r.tlevel--
+	bf := r.bitFiler
+	parent := bf.parent
+	w := r.writerAt
+	if r.tlevel != 0 {
+		w = parent
+	}
+	nwr, err := bf.dumpDirty(w)
+	if err != nil {
+		return
+	}
+
+	switch {
+	case r.tlevel == 0:
+		defer func() {
+			r.bitFiler.free()
+			r.bitFiler = nil
+		}()
+
+		if nwr == 0 {
+			return
+		}
+
+		return r.checkpoint(sz)
+	default:
+		r.bitFiler.free()
+		r.bitFiler = parent.(*bitFiler)
+		sz, _ := bf.Size() // bitFiler.Size() never returns err != nil
+		return parent.Truncate(sz)
+	}
+}
+
+// Implements Filer.
+func (r *RollbackFiler) Name() string {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	return r.f.Name()
+}
+
+// Implements Filer.
+func (r *RollbackFiler) PunchHole(off, size int64) error {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	if r.tlevel == 0 {
+		return &ErrPERM{r.f.Name() + ": PunchHole outside of a transaction"}
+	}
+
+	if off < 0 {
+		return &ErrINVAL{r.f.Name() + ": PunchHole off", off}
+	}
+
+	if size < 0 || off+size > r.bitFiler.size {
+		return &ErrINVAL{r.f.Name() + ": PunchHole size", size}
+	}
+
+	return r.bitFiler.PunchHole(off, size)
+}
+
+// Implements Filer.
+func (r *RollbackFiler) ReadAt(b []byte, off int64) (n int, err error) {
+	r.inCallbackMu.RLock()
+	defer r.inCallbackMu.RUnlock()
+	if !r.inCallback {
+		r.mu.RLock()
+		defer r.mu.RUnlock()
+	}
+	if r.tlevel == 0 {
+		return r.f.ReadAt(b, off)
+	}
+
+	return r.bitFiler.ReadAt(b, off)
+}
+
+// Implements Filer.
+func (r *RollbackFiler) Rollback() (err error) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	if r.tlevel == 0 {
+		return &ErrPERM{r.f.Name() + ": Rollback outside of a transaction"}
+	}
+
+	if r.tlevel > 1 {
+		r.bitFiler.free()
+		r.bitFiler = r.bitFiler.parent.(*bitFiler)
+	}
+	r.tlevel--
+	if f := r.afterRollback; f != nil {
+		r.inCallbackMu.Lock()
+		r.inCallback = true
+		r.inCallbackMu.Unlock()
+		defer func() {
+			r.inCallbackMu.Lock()
+			r.inCallback = false
+			r.inCallbackMu.Unlock()
+		}()
+		return f()
+	}
+	return
+}
+
+func (r *RollbackFiler) size() (sz int64, err error) {
+	if r.tlevel == 0 {
+		return r.f.Size()
+	}
+
+	return r.bitFiler.Size()
+}
+
+// Implements Filer.
+func (r *RollbackFiler) Size() (sz int64, err error) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	return r.size()
+}
+
+// Implements Filer.
+func (r *RollbackFiler) Sync() error {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	return r.f.Sync()
+}
+
+// Implements Filer.
+func (r *RollbackFiler) Truncate(size int64) error {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	if r.tlevel == 0 {
+		return &ErrPERM{r.f.Name() + ": Truncate outside of a transaction"}
+	}
+
+	return r.bitFiler.Truncate(size)
+}
+
+// Implements Filer.
+func (r *RollbackFiler) WriteAt(b []byte, off int64) (n int, err error) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	if r.tlevel == 0 {
+		return 0, &ErrPERM{r.f.Name() + ": WriteAt outside of a transaction"}
+	}
+
+	return r.bitFiler.WriteAt(b, off)
+}

+ 25 - 0
vendor/github.com/edsrzf/mmap-go/LICENSE

@@ -0,0 +1,25 @@
+Copyright (c) 2011, Evan Shaw <[email protected]>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the copyright holder nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+

+ 112 - 0
vendor/github.com/edsrzf/mmap-go/mmap.go

@@ -0,0 +1,112 @@
+// Copyright 2011 Evan Shaw. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file defines the common package interface and contains a little bit of
+// factored out logic.
+
+// Package mmap allows mapping files into memory. It tries to provide a simple, reasonably portable interface,
+// but doesn't go out of its way to abstract away every little platform detail.
+// This specifically means:
+//	* forked processes may or may not inherit mappings
+//	* a file's timestamp may or may not be updated by writes through mappings
+//	* specifying a size larger than the file's actual size can increase the file's size
+//	* If the mapped file is being modified by another process while your program's running, don't expect consistent results between platforms
+package mmap
+
+import (
+	"errors"
+	"os"
+	"reflect"
+	"unsafe"
+)
+
+const (
+	// RDONLY maps the memory read-only.
+	// Attempts to write to the MMap object will result in undefined behavior.
+	RDONLY = 0
+	// RDWR maps the memory as read-write. Writes to the MMap object will update the
+	// underlying file.
+	RDWR = 1 << iota
+	// COPY maps the memory as copy-on-write. Writes to the MMap object will affect
+	// memory, but the underlying file will remain unchanged.
+	COPY
+	// If EXEC is set, the mapped memory is marked as executable.
+	EXEC
+)
+
+const (
+	// If the ANON flag is set, the mapped memory will not be backed by a file.
+	ANON = 1 << iota
+)
+
+// MMap represents a file mapped into memory.
+type MMap []byte
+
+// Map maps an entire file into memory.
+// If ANON is set in flags, f is ignored.
+func Map(f *os.File, prot, flags int) (MMap, error) {
+	return MapRegion(f, -1, prot, flags, 0)
+}
+
+// MapRegion maps part of a file into memory.
+// The offset parameter must be a multiple of the system's page size.
+// If length < 0, the entire file will be mapped.
+// If ANON is set in flags, f is ignored.
+func MapRegion(f *os.File, length int, prot, flags int, offset int64) (MMap, error) {
+	var fd uintptr
+	if flags&ANON == 0 {
+		fd = uintptr(f.Fd())
+		if length < 0 {
+			fi, err := f.Stat()
+			if err != nil {
+				return nil, err
+			}
+			length = int(fi.Size())
+		}
+	} else {
+		if length <= 0 {
+			return nil, errors.New("anonymous mapping requires non-zero length")
+		}
+		fd = ^uintptr(0)
+	}
+	return mmap(length, uintptr(prot), uintptr(flags), fd, offset)
+}
+
+func (m *MMap) header() *reflect.SliceHeader {
+	return (*reflect.SliceHeader)(unsafe.Pointer(m))
+}
+
+// Lock keeps the mapped region in physical memory, ensuring that it will not be
+// swapped out.
+func (m MMap) Lock() error {
+	dh := m.header()
+	return lock(dh.Data, uintptr(dh.Len))
+}
+
+// Unlock reverses the effect of Lock, allowing the mapped region to potentially
+// be swapped out.
+// If m is already unlocked, aan error will result.
+func (m MMap) Unlock() error {
+	dh := m.header()
+	return unlock(dh.Data, uintptr(dh.Len))
+}
+
+// Flush synchronizes the mapping's contents to the file's contents on disk.
+func (m MMap) Flush() error {
+	dh := m.header()
+	return flush(dh.Data, uintptr(dh.Len))
+}
+
+// Unmap deletes the memory mapped region, flushes any remaining changes, and sets
+// m to nil.
+// Trying to read or write any remaining references to m after Unmap is called will
+// result in undefined behavior.
+// Unmap should only be called on the slice value that was originally returned from
+// a call to Map. Calling Unmap on a derived slice may cause errors.
+func (m *MMap) Unmap() error {
+	dh := m.header()
+	err := unmap(dh.Data, uintptr(dh.Len))
+	*m = nil
+	return err
+}

+ 67 - 0
vendor/github.com/edsrzf/mmap-go/mmap_unix.go

@@ -0,0 +1,67 @@
+// Copyright 2011 Evan Shaw. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux openbsd solaris netbsd
+
+package mmap
+
+import (
+	"syscall"
+)
+
+func mmap(len int, inprot, inflags, fd uintptr, off int64) ([]byte, error) {
+	flags := syscall.MAP_SHARED
+	prot := syscall.PROT_READ
+	switch {
+	case inprot&COPY != 0:
+		prot |= syscall.PROT_WRITE
+		flags = syscall.MAP_PRIVATE
+	case inprot&RDWR != 0:
+		prot |= syscall.PROT_WRITE
+	}
+	if inprot&EXEC != 0 {
+		prot |= syscall.PROT_EXEC
+	}
+	if inflags&ANON != 0 {
+		flags |= syscall.MAP_ANON
+	}
+
+	b, err := syscall.Mmap(int(fd), off, len, prot, flags)
+	if err != nil {
+		return nil, err
+	}
+	return b, nil
+}
+
+func flush(addr, len uintptr) error {
+	_, _, errno := syscall.Syscall(_SYS_MSYNC, addr, len, _MS_SYNC)
+	if errno != 0 {
+		return syscall.Errno(errno)
+	}
+	return nil
+}
+
+func lock(addr, len uintptr) error {
+	_, _, errno := syscall.Syscall(syscall.SYS_MLOCK, addr, len, 0)
+	if errno != 0 {
+		return syscall.Errno(errno)
+	}
+	return nil
+}
+
+func unlock(addr, len uintptr) error {
+	_, _, errno := syscall.Syscall(syscall.SYS_MUNLOCK, addr, len, 0)
+	if errno != 0 {
+		return syscall.Errno(errno)
+	}
+	return nil
+}
+
+func unmap(addr, len uintptr) error {
+	_, _, errno := syscall.Syscall(syscall.SYS_MUNMAP, addr, len, 0)
+	if errno != 0 {
+		return syscall.Errno(errno)
+	}
+	return nil
+}

+ 125 - 0
vendor/github.com/edsrzf/mmap-go/mmap_windows.go

@@ -0,0 +1,125 @@
+// Copyright 2011 Evan Shaw. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package mmap
+
+import (
+	"errors"
+	"os"
+	"sync"
+	"syscall"
+)
+
+// mmap on Windows is a two-step process.
+// First, we call CreateFileMapping to get a handle.
+// Then, we call MapviewToFile to get an actual pointer into memory.
+// Because we want to emulate a POSIX-style mmap, we don't want to expose
+// the handle -- only the pointer. We also want to return only a byte slice,
+// not a struct, so it's convenient to manipulate.
+
+// We keep this map so that we can get back the original handle from the memory address.
+var handleLock sync.Mutex
+var handleMap = map[uintptr]syscall.Handle{}
+
+func mmap(len int, prot, flags, hfile uintptr, off int64) ([]byte, error) {
+	flProtect := uint32(syscall.PAGE_READONLY)
+	dwDesiredAccess := uint32(syscall.FILE_MAP_READ)
+	switch {
+	case prot&COPY != 0:
+		flProtect = syscall.PAGE_WRITECOPY
+		dwDesiredAccess = syscall.FILE_MAP_COPY
+	case prot&RDWR != 0:
+		flProtect = syscall.PAGE_READWRITE
+		dwDesiredAccess = syscall.FILE_MAP_WRITE
+	}
+	if prot&EXEC != 0 {
+		flProtect <<= 4
+		dwDesiredAccess |= syscall.FILE_MAP_EXECUTE
+	}
+
+	// The maximum size is the area of the file, starting from 0,
+	// that we wish to allow to be mappable. It is the sum of
+	// the length the user requested, plus the offset where that length
+	// is starting from. This does not map the data into memory.
+	maxSizeHigh := uint32((off + int64(len)) >> 32)
+	maxSizeLow := uint32((off + int64(len)) & 0xFFFFFFFF)
+	// TODO: Do we need to set some security attributes? It might help portability.
+	h, errno := syscall.CreateFileMapping(syscall.Handle(hfile), nil, flProtect, maxSizeHigh, maxSizeLow, nil)
+	if h == 0 {
+		return nil, os.NewSyscallError("CreateFileMapping", errno)
+	}
+
+	// Actually map a view of the data into memory. The view's size
+	// is the length the user requested.
+	fileOffsetHigh := uint32(off >> 32)
+	fileOffsetLow := uint32(off & 0xFFFFFFFF)
+	addr, errno := syscall.MapViewOfFile(h, dwDesiredAccess, fileOffsetHigh, fileOffsetLow, uintptr(len))
+	if addr == 0 {
+		return nil, os.NewSyscallError("MapViewOfFile", errno)
+	}
+	handleLock.Lock()
+	handleMap[addr] = h
+	handleLock.Unlock()
+
+	m := MMap{}
+	dh := m.header()
+	dh.Data = addr
+	dh.Len = len
+	dh.Cap = dh.Len
+
+	return m, nil
+}
+
+func flush(addr, len uintptr) error {
+	errno := syscall.FlushViewOfFile(addr, len)
+	if errno != nil {
+		return os.NewSyscallError("FlushViewOfFile", errno)
+	}
+
+	handleLock.Lock()
+	defer handleLock.Unlock()
+	handle, ok := handleMap[addr]
+	if !ok {
+		// should be impossible; we would've errored above
+		return errors.New("unknown base address")
+	}
+
+	errno = syscall.FlushFileBuffers(handle)
+	return os.NewSyscallError("FlushFileBuffers", errno)
+}
+
+func lock(addr, len uintptr) error {
+	errno := syscall.VirtualLock(addr, len)
+	return os.NewSyscallError("VirtualLock", errno)
+}
+
+func unlock(addr, len uintptr) error {
+	errno := syscall.VirtualUnlock(addr, len)
+	return os.NewSyscallError("VirtualUnlock", errno)
+}
+
+func unmap(addr, len uintptr) error {
+	flush(addr, len)
+	// Lock the UnmapViewOfFile along with the handleMap deletion.
+	// As soon as we unmap the view, the OS is free to give the
+	// same addr to another new map. We don't want another goroutine
+	// to insert and remove the same addr into handleMap while
+	// we're trying to remove our old addr/handle pair.
+	handleLock.Lock()
+	defer handleLock.Unlock()
+	err := syscall.UnmapViewOfFile(addr)
+	if err != nil {
+		return err
+	}
+
+	handle, ok := handleMap[addr]
+	if !ok {
+		// should be impossible; we would've errored above
+		return errors.New("unknown base address")
+	}
+	delete(handleMap, addr)
+
+	e := syscall.CloseHandle(syscall.Handle(handle))
+	return os.NewSyscallError("CloseHandle", e)
+}

+ 8 - 0
vendor/github.com/edsrzf/mmap-go/msync_netbsd.go

@@ -0,0 +1,8 @@
+// Copyright 2011 Evan Shaw. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package mmap
+
+const _SYS_MSYNC = 277
+const _MS_SYNC = 0x04

+ 14 - 0
vendor/github.com/edsrzf/mmap-go/msync_unix.go

@@ -0,0 +1,14 @@
+// Copyright 2011 Evan Shaw. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux openbsd solaris
+
+package mmap
+
+import (
+	"syscall"
+)
+
+const _SYS_MSYNC = syscall.SYS_MSYNC
+const _MS_SYNC = syscall.MS_SYNC

+ 38 - 0
vendor/manifest

@@ -52,6 +52,37 @@
 			"branch": "master",
 			"notests": true
 		},
+		{
+			"importpath": "github.com/cznic/internal/buffer",
+			"repository": "https://github.com/cznic/internal",
+			"revision": "cef02a853c3a93623c42eacd574e7ea05f55531b",
+			"branch": "master",
+			"path": "/buffer",
+			"notests": true
+		},
+		{
+			"importpath": "github.com/cznic/internal/file",
+			"repository": "https://github.com/cznic/internal",
+			"revision": "cef02a853c3a93623c42eacd574e7ea05f55531b",
+			"branch": "master",
+			"path": "/file",
+			"notests": true
+		},
+		{
+			"importpath": "github.com/cznic/internal/slice",
+			"repository": "https://github.com/cznic/internal",
+			"revision": "cef02a853c3a93623c42eacd574e7ea05f55531b",
+			"branch": "master",
+			"path": "/slice",
+			"notests": true
+		},
+		{
+			"importpath": "github.com/cznic/lldb",
+			"repository": "https://github.com/cznic/lldb",
+			"revision": "7376b3bed3d27a7b640e264bfaf278d6d5232550",
+			"branch": "master",
+			"notests": true
+		},
 		{
 			"importpath": "github.com/cznic/mathutil",
 			"repository": "https://github.com/cznic/mathutil",
@@ -93,6 +124,13 @@
 			"revision": "bf29d7cd9038386a5b4a22e2d73c8fb20ae14602",
 			"branch": "master"
 		},
+		{
+			"importpath": "github.com/edsrzf/mmap-go",
+			"repository": "https://github.com/edsrzf/mmap-go",
+			"revision": "935e0e8a636ca4ba70b713f3e38a19e1b77739e8",
+			"branch": "master",
+			"notests": true
+		},
 		{
 			"importpath": "github.com/gobwas/glob",
 			"repository": "https://github.com/gobwas/glob",