|
@@ -23,11 +23,26 @@ import (
|
|
|
|
|
|
const (
|
|
|
dbMaxOpenFiles = 100
|
|
|
- dbWriteBuffer = 16 << 20
|
|
|
+ dbFlushBatch = 4 << MiB
|
|
|
+
|
|
|
+ // A large database is > 200 MiB. It's a mostly arbitrary value, but
|
|
|
+ // it's also the case that each file is 2 MiB by default and when we
|
|
|
+ // have dbMaxOpenFiles of them we will need to start thrashing fd:s.
|
|
|
+ // Switching to large database settings causes larger files to be used
|
|
|
+ // when compacting, reducing the number.
|
|
|
+ dbLargeThreshold = dbMaxOpenFiles * (2 << MiB)
|
|
|
+
|
|
|
+ KiB = 10
|
|
|
+ MiB = 20
|
|
|
)
|
|
|
|
|
|
-var (
|
|
|
- dbFlushBatch = debugEnvValue("WriteBuffer", dbWriteBuffer) / 4 // Some leeway for any leveldb in-memory optimizations
|
|
|
+type Tuning int
|
|
|
+
|
|
|
+const (
|
|
|
+ // N.b. these constants must match those in lib/config.Tuning!
|
|
|
+ TuningAuto Tuning = iota
|
|
|
+ TuningSmall
|
|
|
+ TuningLarge
|
|
|
)
|
|
|
|
|
|
// Lowlevel is the lowest level database interface. It has a very simple
|
|
@@ -49,18 +64,58 @@ type Lowlevel struct {
|
|
|
// Open attempts to open the database at the given location, and runs
|
|
|
// recovery on it if opening fails. Worst case, if recovery is not possible,
|
|
|
// the database is erased and created from scratch.
|
|
|
-func Open(location string) (*Lowlevel, error) {
|
|
|
+func Open(location string, tuning Tuning) (*Lowlevel, error) {
|
|
|
+ opts := optsFor(location, tuning)
|
|
|
+ return open(location, opts)
|
|
|
+}
|
|
|
+
|
|
|
+// optsFor returns the database options to use when opening a database with
|
|
|
+// the given location and tuning. Settings can be overridden by debug
|
|
|
+// environment variables.
|
|
|
+func optsFor(location string, tuning Tuning) *opt.Options {
|
|
|
+ large := false
|
|
|
+ switch tuning {
|
|
|
+ case TuningLarge:
|
|
|
+ large = true
|
|
|
+ case TuningAuto:
|
|
|
+ large = dbIsLarge(location)
|
|
|
+ }
|
|
|
+
|
|
|
+ var (
|
|
|
+ // Set defaults used for small databases.
|
|
|
+ defaultBlockCacheCapacity = 0 // 0 means let leveldb use default
|
|
|
+ defaultBlockSize = 0
|
|
|
+ defaultCompactionTableSize = 0
|
|
|
+ defaultCompactionTableSizeMultiplier = 0
|
|
|
+ defaultWriteBuffer = 16 << MiB // increased from leveldb default of 4 MiB
|
|
|
+ defaultCompactionL0Trigger = opt.DefaultCompactionL0Trigger // explicit because we use it as base for other stuff
|
|
|
+ )
|
|
|
+
|
|
|
+ if large {
|
|
|
+ // Change the parameters for better throughput at the price of some
|
|
|
+ // RAM and larger files. This results in larger batches of writes
|
|
|
+ // and compaction at a lower frequency.
|
|
|
+ l.Infoln("Using large-database tuning")
|
|
|
+
|
|
|
+ defaultBlockCacheCapacity = 64 << MiB
|
|
|
+ defaultBlockSize = 64 << KiB
|
|
|
+ defaultCompactionTableSize = 16 << MiB
|
|
|
+ defaultCompactionTableSizeMultiplier = 20 // 2.0 after division by ten
|
|
|
+ defaultWriteBuffer = 64 << MiB
|
|
|
+ defaultCompactionL0Trigger = 8 // number of l0 files
|
|
|
+ }
|
|
|
+
|
|
|
opts := &opt.Options{
|
|
|
- BlockCacheCapacity: debugEnvValue("BlockCacheCapacity", 0),
|
|
|
+ BlockCacheCapacity: debugEnvValue("BlockCacheCapacity", defaultBlockCacheCapacity),
|
|
|
BlockCacheEvictRemoved: debugEnvValue("BlockCacheEvictRemoved", 0) != 0,
|
|
|
BlockRestartInterval: debugEnvValue("BlockRestartInterval", 0),
|
|
|
- BlockSize: debugEnvValue("BlockSize", 0),
|
|
|
+ BlockSize: debugEnvValue("BlockSize", defaultBlockSize),
|
|
|
CompactionExpandLimitFactor: debugEnvValue("CompactionExpandLimitFactor", 0),
|
|
|
CompactionGPOverlapsFactor: debugEnvValue("CompactionGPOverlapsFactor", 0),
|
|
|
- CompactionL0Trigger: debugEnvValue("CompactionL0Trigger", 0),
|
|
|
+ CompactionL0Trigger: debugEnvValue("CompactionL0Trigger", defaultCompactionL0Trigger),
|
|
|
CompactionSourceLimitFactor: debugEnvValue("CompactionSourceLimitFactor", 0),
|
|
|
- CompactionTableSize: debugEnvValue("CompactionTableSize", 0),
|
|
|
- CompactionTableSizeMultiplier: float64(debugEnvValue("CompactionTableSizeMultiplier", 0)) / 10.0,
|
|
|
+ CompactionTableSize: debugEnvValue("CompactionTableSize", defaultCompactionTableSize),
|
|
|
+ CompactionTableSizeMultiplier: float64(debugEnvValue("CompactionTableSizeMultiplier", defaultCompactionTableSizeMultiplier)) / 10.0,
|
|
|
CompactionTotalSize: debugEnvValue("CompactionTotalSize", 0),
|
|
|
CompactionTotalSizeMultiplier: float64(debugEnvValue("CompactionTotalSizeMultiplier", 0)) / 10.0,
|
|
|
DisableBufferPool: debugEnvValue("DisableBufferPool", 0) != 0,
|
|
@@ -70,15 +125,16 @@ func Open(location string) (*Lowlevel, error) {
|
|
|
NoSync: debugEnvValue("NoSync", 0) != 0,
|
|
|
NoWriteMerge: debugEnvValue("NoWriteMerge", 0) != 0,
|
|
|
OpenFilesCacheCapacity: debugEnvValue("OpenFilesCacheCapacity", dbMaxOpenFiles),
|
|
|
- WriteBuffer: debugEnvValue("WriteBuffer", dbWriteBuffer),
|
|
|
+ WriteBuffer: debugEnvValue("WriteBuffer", defaultWriteBuffer),
|
|
|
// The write slowdown and pause can be overridden, but even if they
|
|
|
// are not and the compaction trigger is overridden we need to
|
|
|
// adjust so that we don't pause writes for L0 compaction before we
|
|
|
// even *start* L0 compaction...
|
|
|
- WriteL0SlowdownTrigger: debugEnvValue("WriteL0SlowdownTrigger", 2*debugEnvValue("CompactionL0Trigger", opt.DefaultCompactionL0Trigger)),
|
|
|
- WriteL0PauseTrigger: debugEnvValue("WriteL0SlowdownTrigger", 3*debugEnvValue("CompactionL0Trigger", opt.DefaultCompactionL0Trigger)),
|
|
|
+ WriteL0SlowdownTrigger: debugEnvValue("WriteL0SlowdownTrigger", 2*debugEnvValue("CompactionL0Trigger", defaultCompactionL0Trigger)),
|
|
|
+ WriteL0PauseTrigger: debugEnvValue("WriteL0SlowdownTrigger", 3*debugEnvValue("CompactionL0Trigger", defaultCompactionL0Trigger)),
|
|
|
}
|
|
|
- return open(location, opts)
|
|
|
+
|
|
|
+ return opts
|
|
|
}
|
|
|
|
|
|
// OpenRO attempts to open the database at the given location, read only.
|
|
@@ -114,6 +170,7 @@ func open(location string, opts *opt.Options) (*Lowlevel, error) {
|
|
|
l.Warnln("Compacting database:", err)
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
return NewLowlevel(db, location), nil
|
|
|
}
|
|
|
|
|
@@ -207,6 +264,31 @@ func (db *Lowlevel) Close() {
|
|
|
db.DB.Close()
|
|
|
}
|
|
|
|
|
|
+// dbIsLarge returns whether the estimated size of the database at location
|
|
|
+// is large enough to warrant optimization for large databases.
|
|
|
+func dbIsLarge(location string) bool {
|
|
|
+ dir, err := os.Open(location)
|
|
|
+ if err != nil {
|
|
|
+ return false
|
|
|
+ }
|
|
|
+
|
|
|
+ fis, err := dir.Readdir(-1)
|
|
|
+ if err != nil {
|
|
|
+ return false
|
|
|
+ }
|
|
|
+
|
|
|
+ var size int64
|
|
|
+ for _, fi := range fis {
|
|
|
+ if fi.Name() == "LOG" {
|
|
|
+ // don't count the size
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ size += fi.Size()
|
|
|
+ }
|
|
|
+
|
|
|
+ return size > dbLargeThreshold
|
|
|
+}
|
|
|
+
|
|
|
// NewLowlevel wraps the given *leveldb.DB into a *lowlevel
|
|
|
func NewLowlevel(db *leveldb.DB, location string) *Lowlevel {
|
|
|
return &Lowlevel{
|