diff options
Diffstat (limited to 'Godeps/_workspace/src/github.com/syndtr/goleveldb')
81 files changed, 18440 insertions, 0 deletions
diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch.go new file mode 100644 index 000000000..0d7911eca --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch.go @@ -0,0 +1,216 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "encoding/binary" + "errors" + + "github.com/syndtr/goleveldb/leveldb/memdb" +) + +var ( + errBatchTooShort = errors.New("leveldb: batch is too short") + errBatchBadRecord = errors.New("leveldb: bad record in batch") +) + +const kBatchHdrLen = 8 + 4 + +type batchReplay interface { + put(key, value []byte, seq uint64) + delete(key []byte, seq uint64) +} + +// Batch is a write batch. +type Batch struct { + buf []byte + rLen, bLen int + seq uint64 + sync bool +} + +func (b *Batch) grow(n int) { + off := len(b.buf) + if off == 0 { + // include headers + off = kBatchHdrLen + n += off + } + if cap(b.buf)-off >= n { + return + } + buf := make([]byte, 2*cap(b.buf)+n) + copy(buf, b.buf) + b.buf = buf[:off] +} + +func (b *Batch) appendRec(t vType, key, value []byte) { + n := 1 + binary.MaxVarintLen32 + len(key) + if t == tVal { + n += binary.MaxVarintLen32 + len(value) + } + b.grow(n) + off := len(b.buf) + buf := b.buf[:off+n] + buf[off] = byte(t) + off += 1 + off += binary.PutUvarint(buf[off:], uint64(len(key))) + copy(buf[off:], key) + off += len(key) + if t == tVal { + off += binary.PutUvarint(buf[off:], uint64(len(value))) + copy(buf[off:], value) + off += len(value) + } + b.buf = buf[:off] + b.rLen++ + // Include 8-byte ikey header + b.bLen += len(key) + len(value) + 8 +} + +// Put appends 'put operation' of the given key/value pair to the batch. +// It is safe to modify the contents of the argument after Put returns. +func (b *Batch) Put(key, value []byte) { + b.appendRec(tVal, key, value) +} + +// Delete appends 'delete operation' of the given key to the batch. +// It is safe to modify the contents of the argument after Delete returns. +func (b *Batch) Delete(key []byte) { + b.appendRec(tDel, key, nil) +} + +// Reset resets the batch. +func (b *Batch) Reset() { + b.buf = nil + b.seq = 0 + b.rLen = 0 + b.bLen = 0 + b.sync = false +} + +func (b *Batch) init(sync bool) { + b.sync = sync +} + +func (b *Batch) put(key, value []byte, seq uint64) { + if b.rLen == 0 { + b.seq = seq + } + b.Put(key, value) +} + +func (b *Batch) delete(key []byte, seq uint64) { + if b.rLen == 0 { + b.seq = seq + } + b.Delete(key) +} + +func (b *Batch) append(p *Batch) { + if p.rLen > 0 { + b.grow(len(p.buf) - kBatchHdrLen) + b.buf = append(b.buf, p.buf[kBatchHdrLen:]...) + b.rLen += p.rLen + } + if p.sync { + b.sync = true + } +} + +func (b *Batch) len() int { + return b.rLen +} + +func (b *Batch) size() int { + return b.bLen +} + +func (b *Batch) encode() []byte { + b.grow(0) + binary.LittleEndian.PutUint64(b.buf, b.seq) + binary.LittleEndian.PutUint32(b.buf[8:], uint32(b.rLen)) + + return b.buf +} + +func (b *Batch) decode(buf []byte) error { + if len(buf) < kBatchHdrLen { + return errBatchTooShort + } + + b.seq = binary.LittleEndian.Uint64(buf) + b.rLen = int(binary.LittleEndian.Uint32(buf[8:])) + // No need to be precise at this point, it won't be used anyway + b.bLen = len(buf) - kBatchHdrLen + b.buf = buf + + return nil +} + +func (b *Batch) decodeRec(f func(i int, t vType, key, value []byte)) error { + off := kBatchHdrLen + for i := 0; i < b.rLen; i++ { + if off >= len(b.buf) { + return errors.New("leveldb: invalid batch record length") + } + + t := vType(b.buf[off]) + if t > tVal { + return errors.New("leveldb: invalid batch record type in batch") + } + off += 1 + + x, n := binary.Uvarint(b.buf[off:]) + off += n + if n <= 0 || off+int(x) > len(b.buf) { + return errBatchBadRecord + } + key := b.buf[off : off+int(x)] + off += int(x) + + var value []byte + if t == tVal { + x, n := binary.Uvarint(b.buf[off:]) + off += n + if n <= 0 || off+int(x) > len(b.buf) { + return errBatchBadRecord + } + value = b.buf[off : off+int(x)] + off += int(x) + } + + f(i, t, key, value) + } + + return nil +} + +func (b *Batch) replay(to batchReplay) error { + return b.decodeRec(func(i int, t vType, key, value []byte) { + switch t { + case tVal: + to.put(key, value, b.seq+uint64(i)) + case tDel: + to.delete(key, b.seq+uint64(i)) + } + }) +} + +func (b *Batch) memReplay(to *memdb.DB) error { + return b.decodeRec(func(i int, t vType, key, value []byte) { + ikey := newIKey(key, b.seq+uint64(i), t) + to.Put(ikey, value) + }) +} + +func (b *Batch) revertMemReplay(to *memdb.DB) error { + return b.decodeRec(func(i int, t vType, key, value []byte) { + ikey := newIKey(key, b.seq+uint64(i), t) + to.Delete(ikey) + }) +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch_test.go new file mode 100644 index 000000000..19b749b8f --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch_test.go @@ -0,0 +1,120 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "bytes" + "testing" + + "github.com/syndtr/goleveldb/leveldb/comparer" + "github.com/syndtr/goleveldb/leveldb/memdb" +) + +type tbRec struct { + t vType + key, value []byte +} + +type testBatch struct { + rec []*tbRec +} + +func (p *testBatch) put(key, value []byte, seq uint64) { + p.rec = append(p.rec, &tbRec{tVal, key, value}) +} + +func (p *testBatch) delete(key []byte, seq uint64) { + p.rec = append(p.rec, &tbRec{tDel, key, nil}) +} + +func compareBatch(t *testing.T, b1, b2 *Batch) { + if b1.seq != b2.seq { + t.Errorf("invalid seq number want %d, got %d", b1.seq, b2.seq) + } + if b1.len() != b2.len() { + t.Fatalf("invalid record length want %d, got %d", b1.len(), b2.len()) + } + p1, p2 := new(testBatch), new(testBatch) + err := b1.replay(p1) + if err != nil { + t.Fatal("error when replaying batch 1: ", err) + } + err = b2.replay(p2) + if err != nil { + t.Fatal("error when replaying batch 2: ", err) + } + for i := range p1.rec { + r1, r2 := p1.rec[i], p2.rec[i] + if r1.t != r2.t { + t.Errorf("invalid type on record '%d' want %d, got %d", i, r1.t, r2.t) + } + if !bytes.Equal(r1.key, r2.key) { + t.Errorf("invalid key on record '%d' want %s, got %s", i, string(r1.key), string(r2.key)) + } + if r1.t == tVal { + if !bytes.Equal(r1.value, r2.value) { + t.Errorf("invalid value on record '%d' want %s, got %s", i, string(r1.value), string(r2.value)) + } + } + } +} + +func TestBatch_EncodeDecode(t *testing.T) { + b1 := new(Batch) + b1.seq = 10009 + b1.Put([]byte("key1"), []byte("value1")) + b1.Put([]byte("key2"), []byte("value2")) + b1.Delete([]byte("key1")) + b1.Put([]byte("k"), []byte("")) + b1.Put([]byte("zzzzzzzzzzz"), []byte("zzzzzzzzzzzzzzzzzzzzzzzz")) + b1.Delete([]byte("key10000")) + b1.Delete([]byte("k")) + buf := b1.encode() + b2 := new(Batch) + err := b2.decode(buf) + if err != nil { + t.Error("error when decoding batch: ", err) + } + compareBatch(t, b1, b2) +} + +func TestBatch_Append(t *testing.T) { + b1 := new(Batch) + b1.seq = 10009 + b1.Put([]byte("key1"), []byte("value1")) + b1.Put([]byte("key2"), []byte("value2")) + b1.Delete([]byte("key1")) + b1.Put([]byte("foo"), []byte("foovalue")) + b1.Put([]byte("bar"), []byte("barvalue")) + b2a := new(Batch) + b2a.seq = 10009 + b2a.Put([]byte("key1"), []byte("value1")) + b2a.Put([]byte("key2"), []byte("value2")) + b2a.Delete([]byte("key1")) + b2b := new(Batch) + b2b.Put([]byte("foo"), []byte("foovalue")) + b2b.Put([]byte("bar"), []byte("barvalue")) + b2a.append(b2b) + compareBatch(t, b1, b2a) +} + +func TestBatch_Size(t *testing.T) { + b := new(Batch) + for i := 0; i < 2; i++ { + b.Put([]byte("key1"), []byte("value1")) + b.Put([]byte("key2"), []byte("value2")) + b.Delete([]byte("key1")) + b.Put([]byte("foo"), []byte("foovalue")) + b.Put([]byte("bar"), []byte("barvalue")) + mem := memdb.New(&iComparer{comparer.DefaultComparer}, 0) + b.memReplay(mem) + if b.size() != mem.Size() { + t.Errorf("invalid batch size calculation, want=%d got=%d", mem.Size(), b.size()) + } + b.Reset() + } +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/bench_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/bench_test.go new file mode 100644 index 000000000..ea6801a89 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/bench_test.go @@ -0,0 +1,461 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "bytes" + "fmt" + "math/rand" + "os" + "path/filepath" + "runtime" + "testing" + + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/storage" +) + +func randomString(r *rand.Rand, n int) []byte { + b := new(bytes.Buffer) + for i := 0; i < n; i++ { + b.WriteByte(' ' + byte(r.Intn(95))) + } + return b.Bytes() +} + +func compressibleStr(r *rand.Rand, frac float32, n int) []byte { + nn := int(float32(n) * frac) + rb := randomString(r, nn) + b := make([]byte, 0, n+nn) + for len(b) < n { + b = append(b, rb...) + } + return b[:n] +} + +type valueGen struct { + src []byte + pos int +} + +func newValueGen(frac float32) *valueGen { + v := new(valueGen) + r := rand.New(rand.NewSource(301)) + v.src = make([]byte, 0, 1048576+100) + for len(v.src) < 1048576 { + v.src = append(v.src, compressibleStr(r, frac, 100)...) + } + return v +} + +func (v *valueGen) get(n int) []byte { + if v.pos+n > len(v.src) { + v.pos = 0 + } + v.pos += n + return v.src[v.pos-n : v.pos] +} + +var benchDB = filepath.Join(os.TempDir(), fmt.Sprintf("goleveldbbench-%d", os.Getuid())) + +type dbBench struct { + b *testing.B + stor storage.Storage + db *DB + + o *opt.Options + ro *opt.ReadOptions + wo *opt.WriteOptions + + keys, values [][]byte +} + +func openDBBench(b *testing.B, noCompress bool) *dbBench { + _, err := os.Stat(benchDB) + if err == nil { + err = os.RemoveAll(benchDB) + if err != nil { + b.Fatal("cannot remove old db: ", err) + } + } + + p := &dbBench{ + b: b, + o: &opt.Options{}, + ro: &opt.ReadOptions{}, + wo: &opt.WriteOptions{}, + } + p.stor, err = storage.OpenFile(benchDB) + if err != nil { + b.Fatal("cannot open stor: ", err) + } + if noCompress { + p.o.Compression = opt.NoCompression + } + + p.db, err = Open(p.stor, p.o) + if err != nil { + b.Fatal("cannot open db: ", err) + } + + runtime.GOMAXPROCS(runtime.NumCPU()) + return p +} + +func (p *dbBench) reopen() { + p.db.Close() + var err error + p.db, err = Open(p.stor, p.o) + if err != nil { + p.b.Fatal("Reopen: got error: ", err) + } +} + +func (p *dbBench) populate(n int) { + p.keys, p.values = make([][]byte, n), make([][]byte, n) + v := newValueGen(0.5) + for i := range p.keys { + p.keys[i], p.values[i] = []byte(fmt.Sprintf("%016d", i)), v.get(100) + } +} + +func (p *dbBench) randomize() { + m := len(p.keys) + times := m * 2 + r1, r2 := rand.New(rand.NewSource(0xdeadbeef)), rand.New(rand.NewSource(0xbeefface)) + for n := 0; n < times; n++ { + i, j := r1.Int()%m, r2.Int()%m + if i == j { + continue + } + p.keys[i], p.keys[j] = p.keys[j], p.keys[i] + p.values[i], p.values[j] = p.values[j], p.values[i] + } +} + +func (p *dbBench) writes(perBatch int) { + b := p.b + db := p.db + + n := len(p.keys) + m := n / perBatch + if n%perBatch > 0 { + m++ + } + batches := make([]Batch, m) + j := 0 + for i := range batches { + first := true + for ; j < n && ((j+1)%perBatch != 0 || first); j++ { + first = false + batches[i].Put(p.keys[j], p.values[j]) + } + } + runtime.GC() + + b.ResetTimer() + b.StartTimer() + for i := range batches { + err := db.Write(&(batches[i]), p.wo) + if err != nil { + b.Fatal("write failed: ", err) + } + } + b.StopTimer() + b.SetBytes(116) +} + +func (p *dbBench) drop() { + p.keys, p.values = nil, nil + runtime.GC() +} + +func (p *dbBench) puts() { + b := p.b + db := p.db + + b.ResetTimer() + b.StartTimer() + for i := range p.keys { + err := db.Put(p.keys[i], p.values[i], p.wo) + if err != nil { + b.Fatal("put failed: ", err) + } + } + b.StopTimer() + b.SetBytes(116) +} + +func (p *dbBench) fill() { + b := p.b + db := p.db + + perBatch := 10000 + batch := new(Batch) + for i, n := 0, len(p.keys); i < n; { + first := true + for ; i < n && ((i+1)%perBatch != 0 || first); i++ { + first = false + batch.Put(p.keys[i], p.values[i]) + } + err := db.Write(batch, p.wo) + if err != nil { + b.Fatal("write failed: ", err) + } + batch.Reset() + } +} + +func (p *dbBench) gets() { + b := p.b + db := p.db + + b.ResetTimer() + for i := range p.keys { + _, err := db.Get(p.keys[i], p.ro) + if err != nil { + b.Error("got error: ", err) + } + } + b.StopTimer() +} + +func (p *dbBench) seeks() { + b := p.b + + iter := p.newIter() + defer iter.Release() + b.ResetTimer() + for i := range p.keys { + if !iter.Seek(p.keys[i]) { + b.Error("value not found for: ", string(p.keys[i])) + } + } + b.StopTimer() +} + +func (p *dbBench) newIter() iterator.Iterator { + iter := p.db.NewIterator(nil, p.ro) + err := iter.Error() + if err != nil { + p.b.Fatal("cannot create iterator: ", err) + } + return iter +} + +func (p *dbBench) close() { + p.db.Close() + p.stor.Close() + os.RemoveAll(benchDB) + p.db = nil + p.keys = nil + p.values = nil + runtime.GC() + runtime.GOMAXPROCS(1) +} + +func BenchmarkDBWrite(b *testing.B) { + p := openDBBench(b, false) + p.populate(b.N) + p.writes(1) + p.close() +} + +func BenchmarkDBWriteBatch(b *testing.B) { + p := openDBBench(b, false) + p.populate(b.N) + p.writes(1000) + p.close() +} + +func BenchmarkDBWriteUncompressed(b *testing.B) { + p := openDBBench(b, true) + p.populate(b.N) + p.writes(1) + p.close() +} + +func BenchmarkDBWriteBatchUncompressed(b *testing.B) { + p := openDBBench(b, true) + p.populate(b.N) + p.writes(1000) + p.close() +} + +func BenchmarkDBWriteRandom(b *testing.B) { + p := openDBBench(b, false) + p.populate(b.N) + p.randomize() + p.writes(1) + p.close() +} + +func BenchmarkDBWriteRandomSync(b *testing.B) { + p := openDBBench(b, false) + p.wo.Sync = true + p.populate(b.N) + p.writes(1) + p.close() +} + +func BenchmarkDBOverwrite(b *testing.B) { + p := openDBBench(b, false) + p.populate(b.N) + p.writes(1) + p.writes(1) + p.close() +} + +func BenchmarkDBOverwriteRandom(b *testing.B) { + p := openDBBench(b, false) + p.populate(b.N) + p.writes(1) + p.randomize() + p.writes(1) + p.close() +} + +func BenchmarkDBPut(b *testing.B) { + p := openDBBench(b, false) + p.populate(b.N) + p.puts() + p.close() +} + +func BenchmarkDBRead(b *testing.B) { + p := openDBBench(b, false) + p.populate(b.N) + p.fill() + p.drop() + + iter := p.newIter() + b.ResetTimer() + for iter.Next() { + } + iter.Release() + b.StopTimer() + b.SetBytes(116) + p.close() +} + +func BenchmarkDBReadGC(b *testing.B) { + p := openDBBench(b, false) + p.populate(b.N) + p.fill() + + iter := p.newIter() + b.ResetTimer() + for iter.Next() { + } + iter.Release() + b.StopTimer() + b.SetBytes(116) + p.close() +} + +func BenchmarkDBReadUncompressed(b *testing.B) { + p := openDBBench(b, true) + p.populate(b.N) + p.fill() + p.drop() + + iter := p.newIter() + b.ResetTimer() + for iter.Next() { + } + iter.Release() + b.StopTimer() + b.SetBytes(116) + p.close() +} + +func BenchmarkDBReadTable(b *testing.B) { + p := openDBBench(b, false) + p.populate(b.N) + p.fill() + p.reopen() + p.drop() + + iter := p.newIter() + b.ResetTimer() + for iter.Next() { + } + iter.Release() + b.StopTimer() + b.SetBytes(116) + p.close() +} + +func BenchmarkDBReadReverse(b *testing.B) { + p := openDBBench(b, false) + p.populate(b.N) + p.fill() + p.drop() + + iter := p.newIter() + b.ResetTimer() + iter.Last() + for iter.Prev() { + } + iter.Release() + b.StopTimer() + b.SetBytes(116) + p.close() +} + +func BenchmarkDBReadReverseTable(b *testing.B) { + p := openDBBench(b, false) + p.populate(b.N) + p.fill() + p.reopen() + p.drop() + + iter := p.newIter() + b.ResetTimer() + iter.Last() + for iter.Prev() { + } + iter.Release() + b.StopTimer() + b.SetBytes(116) + p.close() +} + +func BenchmarkDBSeek(b *testing.B) { + p := openDBBench(b, false) + p.populate(b.N) + p.fill() + p.seeks() + p.close() +} + +func BenchmarkDBSeekRandom(b *testing.B) { + p := openDBBench(b, false) + p.populate(b.N) + p.fill() + p.randomize() + p.seeks() + p.close() +} + +func BenchmarkDBGet(b *testing.B) { + p := openDBBench(b, false) + p.populate(b.N) + p.fill() + p.gets() + p.close() +} + +func BenchmarkDBGetRandom(b *testing.B) { + p := openDBBench(b, false) + p.populate(b.N) + p.fill() + p.randomize() + p.gets() + p.close() +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache.go new file mode 100644 index 000000000..9b6a74977 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache.go @@ -0,0 +1,125 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package cache provides interface and implementation of a cache algorithms. +package cache + +import ( + "sync/atomic" +) + +// SetFunc used by Namespace.Get method to create a cache object. SetFunc +// may return ok false, in that case the cache object will not be created. +type SetFunc func() (ok bool, value interface{}, charge int, fin SetFin) + +// SetFin will be called when corresponding cache object are released. +type SetFin func() + +// DelFin will be called when corresponding cache object are released. +// DelFin will be called after SetFin. The exist is true if the corresponding +// cache object is actually exist in the cache tree. +type DelFin func(exist bool) + +// PurgeFin will be called when corresponding cache object are released. +// PurgeFin will be called after SetFin. If PurgeFin present DelFin will +// not be executed but passed to the PurgeFin, it is up to the caller +// to call it or not. +type PurgeFin func(ns, key uint64, delfin DelFin) + +// Cache is a cache tree. +type Cache interface { + // SetCapacity sets cache capacity. + SetCapacity(capacity int) + + // GetNamespace gets or creates a cache namespace for the given id. + GetNamespace(id uint64) Namespace + + // Purge purges all cache namespaces, read Namespace.Purge method documentation. + Purge(fin PurgeFin) + + // Zap zaps all cache namespaces, read Namespace.Zap method documentation. + Zap(closed bool) +} + +// Namespace is a cache namespace. +type Namespace interface { + // Get gets cache object for the given key. The given SetFunc (if not nil) will + // be called if the given key does not exist. + // If the given key does not exist, SetFunc is nil or SetFunc return ok false, Get + // will return ok false. + Get(key uint64, setf SetFunc) (obj Object, ok bool) + + // Get deletes cache object for the given key. If exist the cache object will + // be deleted later when all of its handles have been released (i.e. no one use + // it anymore) and the given DelFin (if not nil) will finally be executed. If + // such cache object does not exist the given DelFin will be executed anyway. + // + // Delete returns true if such cache object exist. + Delete(key uint64, fin DelFin) bool + + // Purge deletes all cache objects, read Delete method documentation. + Purge(fin PurgeFin) + + // Zap detaches the namespace from the cache tree and delete all its cache + // objects. The cache objects deletion and finalizers execution are happen + // immediately, even if its existing handles haven't yet been released. + // A zapped namespace can't never be filled again. + // If closed is false then the Get function will always call the given SetFunc + // if it is not nil, but resultant of the SetFunc will not be cached. + Zap(closed bool) +} + +// Object is a cache object. +type Object interface { + // Release releases the cache object. Other methods should not be called + // after the cache object has been released. + Release() + + // Value returns value of the cache object. + Value() interface{} +} + +// Namespace state. +type nsState int + +const ( + nsEffective nsState = iota + nsZapped + nsClosed +) + +// Node state. +type nodeState int + +const ( + nodeEffective nodeState = iota + nodeEvicted + nodeRemoved +) + +// Fake object. +type fakeObject struct { + value interface{} + fin func() + once uint32 +} + +func (o *fakeObject) Value() interface{} { + if atomic.LoadUint32(&o.once) == 0 { + return o.value + } + return nil +} + +func (o *fakeObject) Release() { + if !atomic.CompareAndSwapUint32(&o.once, 0, 1) { + return + } + if o.fin != nil { + o.fin() + o.fin = nil + } +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache_test.go new file mode 100644 index 000000000..07a9939b2 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache_test.go @@ -0,0 +1,236 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package cache + +import ( + "math/rand" + "testing" +) + +func set(ns Namespace, key uint64, value interface{}, charge int, fin func()) Object { + obj, _ := ns.Get(key, func() (bool, interface{}, int, SetFin) { + return true, value, charge, fin + }) + return obj +} + +func TestCache_HitMiss(t *testing.T) { + cases := []struct { + key uint64 + value string + }{ + {1, "vvvvvvvvv"}, + {100, "v1"}, + {0, "v2"}, + {12346, "v3"}, + {777, "v4"}, + {999, "v5"}, + {7654, "v6"}, + {2, "v7"}, + {3, "v8"}, + {9, "v9"}, + } + + setfin := 0 + c := NewLRUCache(1000) + ns := c.GetNamespace(0) + for i, x := range cases { + set(ns, x.key, x.value, len(x.value), func() { + setfin++ + }).Release() + for j, y := range cases { + r, ok := ns.Get(y.key, nil) + if j <= i { + // should hit + if !ok { + t.Errorf("case '%d' iteration '%d' is miss", i, j) + } else if r.Value().(string) != y.value { + t.Errorf("case '%d' iteration '%d' has invalid value got '%s', want '%s'", i, j, r.Value().(string), y.value) + } + } else { + // should miss + if ok { + t.Errorf("case '%d' iteration '%d' is hit , value '%s'", i, j, r.Value().(string)) + } + } + if ok { + r.Release() + } + } + } + + for i, x := range cases { + finalizerOk := false + ns.Delete(x.key, func(exist bool) { + finalizerOk = true + }) + + if !finalizerOk { + t.Errorf("case %d delete finalizer not executed", i) + } + + for j, y := range cases { + r, ok := ns.Get(y.key, nil) + if j > i { + // should hit + if !ok { + t.Errorf("case '%d' iteration '%d' is miss", i, j) + } else if r.Value().(string) != y.value { + t.Errorf("case '%d' iteration '%d' has invalid value got '%s', want '%s'", i, j, r.Value().(string), y.value) + } + } else { + // should miss + if ok { + t.Errorf("case '%d' iteration '%d' is hit, value '%s'", i, j, r.Value().(string)) + } + } + if ok { + r.Release() + } + } + } + + if setfin != len(cases) { + t.Errorf("some set finalizer may not be executed, want=%d got=%d", len(cases), setfin) + } +} + +func TestLRUCache_Eviction(t *testing.T) { + c := NewLRUCache(12) + ns := c.GetNamespace(0) + o1 := set(ns, 1, 1, 1, nil) + set(ns, 2, 2, 1, nil).Release() + set(ns, 3, 3, 1, nil).Release() + set(ns, 4, 4, 1, nil).Release() + set(ns, 5, 5, 1, nil).Release() + if r, ok := ns.Get(2, nil); ok { // 1,3,4,5,2 + r.Release() + } + set(ns, 9, 9, 10, nil).Release() // 5,2,9 + + for _, x := range []uint64{9, 2, 5, 1} { + r, ok := ns.Get(x, nil) + if !ok { + t.Errorf("miss for key '%d'", x) + } else { + if r.Value().(int) != int(x) { + t.Errorf("invalid value for key '%d' want '%d', got '%d'", x, x, r.Value().(int)) + } + r.Release() + } + } + o1.Release() + for _, x := range []uint64{1, 2, 5} { + r, ok := ns.Get(x, nil) + if !ok { + t.Errorf("miss for key '%d'", x) + } else { + if r.Value().(int) != int(x) { + t.Errorf("invalid value for key '%d' want '%d', got '%d'", x, x, r.Value().(int)) + } + r.Release() + } + } + for _, x := range []uint64{3, 4, 9} { + r, ok := ns.Get(x, nil) + if ok { + t.Errorf("hit for key '%d'", x) + if r.Value().(int) != int(x) { + t.Errorf("invalid value for key '%d' want '%d', got '%d'", x, x, r.Value().(int)) + } + r.Release() + } + } +} + +func TestLRUCache_SetGet(t *testing.T) { + c := NewLRUCache(13) + ns := c.GetNamespace(0) + for i := 0; i < 200; i++ { + n := uint64(rand.Intn(99999) % 20) + set(ns, n, n, 1, nil).Release() + if p, ok := ns.Get(n, nil); ok { + if p.Value() == nil { + t.Errorf("key '%d' contains nil value", n) + } else { + got := p.Value().(uint64) + if got != n { + t.Errorf("invalid value for key '%d' want '%d', got '%d'", n, n, got) + } + } + p.Release() + } else { + t.Errorf("key '%d' doesn't exist", n) + } + } +} + +func TestLRUCache_Purge(t *testing.T) { + c := NewLRUCache(3) + ns1 := c.GetNamespace(0) + o1 := set(ns1, 1, 1, 1, nil) + o2 := set(ns1, 2, 2, 1, nil) + ns1.Purge(nil) + set(ns1, 3, 3, 1, nil).Release() + for _, x := range []uint64{1, 2, 3} { + r, ok := ns1.Get(x, nil) + if !ok { + t.Errorf("miss for key '%d'", x) + } else { + if r.Value().(int) != int(x) { + t.Errorf("invalid value for key '%d' want '%d', got '%d'", x, x, r.Value().(int)) + } + r.Release() + } + } + o1.Release() + o2.Release() + for _, x := range []uint64{1, 2} { + r, ok := ns1.Get(x, nil) + if ok { + t.Errorf("hit for key '%d'", x) + if r.Value().(int) != int(x) { + t.Errorf("invalid value for key '%d' want '%d', got '%d'", x, x, r.Value().(int)) + } + r.Release() + } + } +} + +func BenchmarkLRUCache_SetRelease(b *testing.B) { + capacity := b.N / 100 + if capacity <= 0 { + capacity = 10 + } + c := NewLRUCache(capacity) + ns := c.GetNamespace(0) + b.ResetTimer() + for i := uint64(0); i < uint64(b.N); i++ { + set(ns, i, nil, 1, nil).Release() + } +} + +func BenchmarkLRUCache_SetReleaseTwice(b *testing.B) { + capacity := b.N / 100 + if capacity <= 0 { + capacity = 10 + } + c := NewLRUCache(capacity) + ns := c.GetNamespace(0) + b.ResetTimer() + + na := b.N / 2 + nb := b.N - na + + for i := uint64(0); i < uint64(na); i++ { + set(ns, i, nil, 1, nil).Release() + } + + for i := uint64(0); i < uint64(nb); i++ { + set(ns, i, nil, 1, nil).Release() + } +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/empty_cache.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/empty_cache.go new file mode 100644 index 000000000..1fbf81459 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/empty_cache.go @@ -0,0 +1,246 @@ +// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package cache + +import ( + "sync" + "sync/atomic" +) + +type emptyCache struct { + sync.Mutex + table map[uint64]*emptyNS +} + +// NewEmptyCache creates a new initialized empty cache. +func NewEmptyCache() Cache { + return &emptyCache{ + table: make(map[uint64]*emptyNS), + } +} + +func (c *emptyCache) GetNamespace(id uint64) Namespace { + c.Lock() + defer c.Unlock() + + if ns, ok := c.table[id]; ok { + return ns + } + + ns := &emptyNS{ + cache: c, + id: id, + table: make(map[uint64]*emptyNode), + } + c.table[id] = ns + return ns +} + +func (c *emptyCache) Purge(fin PurgeFin) { + c.Lock() + for _, ns := range c.table { + ns.purgeNB(fin) + } + c.Unlock() +} + +func (c *emptyCache) Zap(closed bool) { + c.Lock() + for _, ns := range c.table { + ns.zapNB(closed) + } + c.table = make(map[uint64]*emptyNS) + c.Unlock() +} + +func (*emptyCache) SetCapacity(capacity int) {} + +type emptyNS struct { + cache *emptyCache + id uint64 + table map[uint64]*emptyNode + state nsState +} + +func (ns *emptyNS) Get(key uint64, setf SetFunc) (o Object, ok bool) { + ns.cache.Lock() + + switch ns.state { + case nsZapped: + ns.cache.Unlock() + if setf == nil { + return + } + + var value interface{} + var fin func() + ok, value, _, fin = setf() + if ok { + o = &fakeObject{ + value: value, + fin: fin, + } + } + return + case nsClosed: + ns.cache.Unlock() + return + } + + n, ok := ns.table[key] + if ok { + n.ref++ + } else { + if setf == nil { + ns.cache.Unlock() + return + } + + var value interface{} + var fin func() + ok, value, _, fin = setf() + if !ok { + ns.cache.Unlock() + return + } + + n = &emptyNode{ + ns: ns, + key: key, + value: value, + setfin: fin, + ref: 1, + } + ns.table[key] = n + } + + ns.cache.Unlock() + o = &emptyObject{node: n} + return +} + +func (ns *emptyNS) Delete(key uint64, fin DelFin) bool { + ns.cache.Lock() + + if ns.state != nsEffective { + ns.cache.Unlock() + if fin != nil { + fin(false) + } + return false + } + + n, ok := ns.table[key] + if !ok { + ns.cache.Unlock() + if fin != nil { + fin(false) + } + return false + } + n.delfin = fin + ns.cache.Unlock() + return true +} + +func (ns *emptyNS) purgeNB(fin PurgeFin) { + if ns.state != nsEffective { + return + } + for _, n := range ns.table { + n.purgefin = fin + } +} + +func (ns *emptyNS) Purge(fin PurgeFin) { + ns.cache.Lock() + ns.purgeNB(fin) + ns.cache.Unlock() +} + +func (ns *emptyNS) zapNB(closed bool) { + if ns.state != nsEffective { + return + } + for _, n := range ns.table { + n.execFin() + } + if closed { + ns.state = nsClosed + } else { + ns.state = nsZapped + } + ns.table = nil +} + +func (ns *emptyNS) Zap(closed bool) { + ns.cache.Lock() + ns.zapNB(closed) + delete(ns.cache.table, ns.id) + ns.cache.Unlock() +} + +type emptyNode struct { + ns *emptyNS + key uint64 + value interface{} + ref int + setfin SetFin + delfin DelFin + purgefin PurgeFin +} + +func (n *emptyNode) execFin() { + if n.setfin != nil { + n.setfin() + n.setfin = nil + } + if n.purgefin != nil { + n.purgefin(n.ns.id, n.key, n.delfin) + n.delfin = nil + n.purgefin = nil + } else if n.delfin != nil { + n.delfin(true) + n.delfin = nil + } +} + +func (n *emptyNode) evict() { + n.ns.cache.Lock() + n.ref-- + if n.ref == 0 { + if n.ns.state == nsEffective { + // Remove elem. + delete(n.ns.table, n.key) + // Execute finalizer. + n.execFin() + } + } else if n.ref < 0 { + panic("leveldb/cache: emptyNode: negative node reference") + } + n.ns.cache.Unlock() +} + +type emptyObject struct { + node *emptyNode + once uint32 +} + +func (o *emptyObject) Value() interface{} { + if atomic.LoadUint32(&o.once) == 0 { + return o.node.value + } + return nil +} + +func (o *emptyObject) Release() { + if !atomic.CompareAndSwapUint32(&o.once, 0, 1) { + return + } + o.node.evict() + o.node = nil +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/lru_cache.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/lru_cache.go new file mode 100644 index 000000000..3c98e076b --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/lru_cache.go @@ -0,0 +1,354 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package cache + +import ( + "sync" + "sync/atomic" +) + +// lruCache represent a LRU cache state. +type lruCache struct { + sync.Mutex + + recent lruNode + table map[uint64]*lruNs + capacity int + size int +} + +// NewLRUCache creates a new initialized LRU cache with the given capacity. +func NewLRUCache(capacity int) Cache { + c := &lruCache{ + table: make(map[uint64]*lruNs), + capacity: capacity, + } + c.recent.rNext = &c.recent + c.recent.rPrev = &c.recent + return c +} + +// SetCapacity set cache capacity. +func (c *lruCache) SetCapacity(capacity int) { + c.Lock() + c.capacity = capacity + c.evict() + c.Unlock() +} + +// GetNamespace return namespace object for given id. +func (c *lruCache) GetNamespace(id uint64) Namespace { + c.Lock() + defer c.Unlock() + + if p, ok := c.table[id]; ok { + return p + } + + p := &lruNs{ + lru: c, + id: id, + table: make(map[uint64]*lruNode), + } + c.table[id] = p + return p +} + +// Purge purge entire cache. +func (c *lruCache) Purge(fin PurgeFin) { + c.Lock() + for _, ns := range c.table { + ns.purgeNB(fin) + } + c.Unlock() +} + +func (c *lruCache) Zap(closed bool) { + c.Lock() + for _, ns := range c.table { + ns.zapNB(closed) + } + c.table = make(map[uint64]*lruNs) + c.Unlock() +} + +func (c *lruCache) evict() { + top := &c.recent + for n := c.recent.rPrev; c.size > c.capacity && n != top; { + n.state = nodeEvicted + n.rRemove() + n.evictNB() + c.size -= n.charge + n = c.recent.rPrev + } +} + +type lruNs struct { + lru *lruCache + id uint64 + table map[uint64]*lruNode + state nsState +} + +func (ns *lruNs) Get(key uint64, setf SetFunc) (o Object, ok bool) { + lru := ns.lru + lru.Lock() + + switch ns.state { + case nsZapped: + lru.Unlock() + if setf == nil { + return + } + + var value interface{} + var fin func() + ok, value, _, fin = setf() + if ok { + o = &fakeObject{ + value: value, + fin: fin, + } + } + return + case nsClosed: + lru.Unlock() + return + } + + n, ok := ns.table[key] + if ok { + switch n.state { + case nodeEvicted: + // Insert to recent list. + n.state = nodeEffective + n.ref++ + lru.size += n.charge + lru.evict() + fallthrough + case nodeEffective: + // Bump to front + n.rRemove() + n.rInsert(&lru.recent) + } + n.ref++ + } else { + if setf == nil { + lru.Unlock() + return + } + + var value interface{} + var charge int + var fin func() + ok, value, charge, fin = setf() + if !ok { + lru.Unlock() + return + } + + n = &lruNode{ + ns: ns, + key: key, + value: value, + charge: charge, + setfin: fin, + ref: 2, + } + ns.table[key] = n + n.rInsert(&lru.recent) + + lru.size += charge + lru.evict() + } + + lru.Unlock() + o = &lruObject{node: n} + return +} + +func (ns *lruNs) Delete(key uint64, fin DelFin) bool { + lru := ns.lru + lru.Lock() + + if ns.state != nsEffective { + lru.Unlock() + if fin != nil { + fin(false) + } + return false + } + + n, ok := ns.table[key] + if !ok { + lru.Unlock() + if fin != nil { + fin(false) + } + return false + } + + n.delfin = fin + switch n.state { + case nodeRemoved: + lru.Unlock() + return false + case nodeEffective: + lru.size -= n.charge + n.rRemove() + n.evictNB() + } + n.state = nodeRemoved + + lru.Unlock() + return true +} + +func (ns *lruNs) purgeNB(fin PurgeFin) { + lru := ns.lru + if ns.state != nsEffective { + return + } + + for _, n := range ns.table { + n.purgefin = fin + if n.state == nodeEffective { + lru.size -= n.charge + n.rRemove() + n.evictNB() + } + n.state = nodeRemoved + } +} + +func (ns *lruNs) Purge(fin PurgeFin) { + ns.lru.Lock() + ns.purgeNB(fin) + ns.lru.Unlock() +} + +func (ns *lruNs) zapNB(closed bool) { + lru := ns.lru + if ns.state != nsEffective { + return + } + + if closed { + ns.state = nsClosed + } else { + ns.state = nsZapped + } + for _, n := range ns.table { + if n.state == nodeEffective { + lru.size -= n.charge + n.rRemove() + } + n.state = nodeRemoved + n.execFin() + } + ns.table = nil +} + +func (ns *lruNs) Zap(closed bool) { + ns.lru.Lock() + ns.zapNB(closed) + delete(ns.lru.table, ns.id) + ns.lru.Unlock() +} + +type lruNode struct { + ns *lruNs + + rNext, rPrev *lruNode + + key uint64 + value interface{} + charge int + ref int + state nodeState + setfin SetFin + delfin DelFin + purgefin PurgeFin +} + +func (n *lruNode) rInsert(at *lruNode) { + x := at.rNext + at.rNext = n + n.rPrev = at + n.rNext = x + x.rPrev = n +} + +func (n *lruNode) rRemove() bool { + // only remove if not already removed + if n.rPrev == nil { + return false + } + + n.rPrev.rNext = n.rNext + n.rNext.rPrev = n.rPrev + n.rPrev = nil + n.rNext = nil + + return true +} + +func (n *lruNode) execFin() { + if n.setfin != nil { + n.setfin() + n.setfin = nil + } + if n.purgefin != nil { + n.purgefin(n.ns.id, n.key, n.delfin) + n.delfin = nil + n.purgefin = nil + } else if n.delfin != nil { + n.delfin(true) + n.delfin = nil + } +} + +func (n *lruNode) evictNB() { + n.ref-- + if n.ref == 0 { + if n.ns.state == nsEffective { + // remove elem + delete(n.ns.table, n.key) + // execute finalizer + n.execFin() + } + } else if n.ref < 0 { + panic("leveldb/cache: lruCache: negative node reference") + } +} + +func (n *lruNode) evict() { + n.ns.lru.Lock() + n.evictNB() + n.ns.lru.Unlock() +} + +type lruObject struct { + node *lruNode + once uint32 +} + +func (o *lruObject) Value() interface{} { + if atomic.LoadUint32(&o.once) == 0 { + return o.node.value + } + return nil +} + +func (o *lruObject) Release() { + if !atomic.CompareAndSwapUint32(&o.once, 0, 1) { + return + } + + o.node.evict() + o.node = nil +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer.go new file mode 100644 index 000000000..d33d5e9c7 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer.go @@ -0,0 +1,75 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import "github.com/syndtr/goleveldb/leveldb/comparer" + +type iComparer struct { + ucmp comparer.Comparer +} + +func (icmp *iComparer) uName() string { + return icmp.ucmp.Name() +} + +func (icmp *iComparer) uCompare(a, b []byte) int { + return icmp.ucmp.Compare(a, b) +} + +func (icmp *iComparer) uSeparator(dst, a, b []byte) []byte { + return icmp.ucmp.Separator(dst, a, b) +} + +func (icmp *iComparer) uSuccessor(dst, b []byte) []byte { + return icmp.ucmp.Successor(dst, b) +} + +func (icmp *iComparer) Name() string { + return icmp.uName() +} + +func (icmp *iComparer) Compare(a, b []byte) int { + x := icmp.ucmp.Compare(iKey(a).ukey(), iKey(b).ukey()) + if x == 0 { + if m, n := iKey(a).num(), iKey(b).num(); m > n { + x = -1 + } else if m < n { + x = 1 + } + } + return x +} + +func (icmp *iComparer) Separator(dst, a, b []byte) []byte { + ua, ub := iKey(a).ukey(), iKey(b).ukey() + dst = icmp.ucmp.Separator(dst, ua, ub) + if dst == nil { + return nil + } + if len(dst) < len(ua) && icmp.uCompare(ua, dst) < 0 { + dst = append(dst, kMaxNumBytes...) + } else { + // Did not close possibilities that n maybe longer than len(ub). + dst = append(dst, a[len(a)-8:]...) + } + return dst +} + +func (icmp *iComparer) Successor(dst, b []byte) []byte { + ub := iKey(b).ukey() + dst = icmp.ucmp.Successor(dst, ub) + if dst == nil { + return nil + } + if len(dst) < len(ub) && icmp.uCompare(ub, dst) < 0 { + dst = append(dst, kMaxNumBytes...) + } else { + // Did not close possibilities that n maybe longer than len(ub). + dst = append(dst, b[len(b)-8:]...) + } + return dst +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go new file mode 100644 index 000000000..14dddf88d --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go @@ -0,0 +1,51 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package comparer + +import "bytes" + +type bytesComparer struct{} + +func (bytesComparer) Compare(a, b []byte) int { + return bytes.Compare(a, b) +} + +func (bytesComparer) Name() string { + return "leveldb.BytewiseComparator" +} + +func (bytesComparer) Separator(dst, a, b []byte) []byte { + i, n := 0, len(a) + if n > len(b) { + n = len(b) + } + for ; i < n && a[i] == b[i]; i++ { + } + if i >= n { + // Do not shorten if one string is a prefix of the other + } else if c := a[i]; c < 0xff && c+1 < b[i] { + dst = append(dst, a[:i+1]...) + dst[i]++ + return dst + } + return nil +} + +func (bytesComparer) Successor(dst, b []byte) []byte { + for i, c := range b { + if c != 0xff { + dst = append(dst, b[:i+1]...) + dst[i]++ + return dst + } + } + return nil +} + +// DefaultComparer are default implementation of the Comparer interface. +// It uses the natural ordering, consistent with bytes.Compare. +var DefaultComparer = bytesComparer{} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go new file mode 100644 index 000000000..14a28f16f --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go @@ -0,0 +1,57 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package comparer provides interface and implementation for ordering +// sets of data. +package comparer + +// BasicComparer is the interface that wraps the basic Compare method. +type BasicComparer interface { + // Compare returns -1, 0, or +1 depending on whether a is 'less than', + // 'equal to' or 'greater than' b. The two arguments can only be 'equal' + // if their contents are exactly equal. Furthermore, the empty slice + // must be 'less than' any non-empty slice. + Compare(a, b []byte) int +} + +// Comparer defines a total ordering over the space of []byte keys: a 'less +// than' relationship. +type Comparer interface { + BasicComparer + + // Name returns name of the comparer. + // + // The Level-DB on-disk format stores the comparer name, and opening a + // database with a different comparer from the one it was created with + // will result in an error. + // + // An implementation to a new name whenever the comparer implementation + // changes in a way that will cause the relative ordering of any two keys + // to change. + // + // Names starting with "leveldb." are reserved and should not be used + // by any users of this package. + Name() string + + // Bellow are advanced functions used used to reduce the space requirements + // for internal data structures such as index blocks. + + // Separator appends a sequence of bytes x to dst such that a <= x && x < b, + // where 'less than' is consistent with Compare. An implementation should + // return nil if x equal to a. + // + // Either contents of a or b should not by any means modified. Doing so + // may cause corruption on the internal state. + Separator(dst, a, b []byte) []byte + + // Successor appends a sequence of bytes x to dst such that x >= b, where + // 'less than' is consistent with Compare. An implementation should return + // nil if x equal to b. + // + // Contents of b should not by any means modified. Doing so may cause + // corruption on the internal state. + Successor(dst, b []byte) []byte +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/config.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/config.go new file mode 100644 index 000000000..511058897 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/config.go @@ -0,0 +1,40 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +const ( + kNumLevels = 7 + + // Level-0 compaction is started when we hit this many files. + kL0_CompactionTrigger float64 = 4 + + // Soft limit on number of level-0 files. We slow down writes at this point. + kL0_SlowdownWritesTrigger = 8 + + // Maximum number of level-0 files. We stop writes at this point. + kL0_StopWritesTrigger = 12 + + // Maximum level to which a new compacted memdb is pushed if it + // does not create overlap. We try to push to level 2 to avoid the + // relatively expensive level 0=>1 compactions and to avoid some + // expensive manifest file operations. We do not push all the way to + // the largest level since that can generate a lot of wasted disk + // space if the same key space is being repeatedly overwritten. + kMaxMemCompactLevel = 2 + + // Maximum size of a table. + kMaxTableSize = 2 * 1048576 + + // Maximum bytes of overlaps in grandparent (i.e., level+2) before we + // stop building a single file in a level->level+1 compaction. + kMaxGrandParentOverlapBytes = 10 * kMaxTableSize + + // Maximum number of bytes in all compacted files. We avoid expanding + // the lower level file set of a compaction if it would make the + // total compaction cover more than this many bytes. + kExpCompactionMaxBytes = 25 * kMaxTableSize +) diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/corrupt_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/corrupt_test.go new file mode 100644 index 000000000..a036e0893 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/corrupt_test.go @@ -0,0 +1,472 @@ +// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "bytes" + "fmt" + "io" + "math/rand" + "testing" + + "github.com/syndtr/goleveldb/leveldb/cache" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/storage" +) + +const ctValSize = 1000 + +type dbCorruptHarness struct { + dbHarness +} + +func newDbCorruptHarnessWopt(t *testing.T, o *opt.Options) *dbCorruptHarness { + h := new(dbCorruptHarness) + h.init(t, o) + return h +} + +func newDbCorruptHarness(t *testing.T) *dbCorruptHarness { + return newDbCorruptHarnessWopt(t, &opt.Options{ + BlockCache: cache.NewLRUCache(100), + Strict: opt.StrictJournalChecksum, + }) +} + +func (h *dbCorruptHarness) recover() { + p := &h.dbHarness + t := p.t + + var err error + p.db, err = Recover(h.stor, h.o) + if err != nil { + t.Fatal("Repair: got error: ", err) + } +} + +func (h *dbCorruptHarness) build(n int) { + p := &h.dbHarness + t := p.t + db := p.db + + batch := new(Batch) + for i := 0; i < n; i++ { + batch.Reset() + batch.Put(tkey(i), tval(i, ctValSize)) + err := db.Write(batch, p.wo) + if err != nil { + t.Fatal("write error: ", err) + } + } +} + +func (h *dbCorruptHarness) buildShuffled(n int, rnd *rand.Rand) { + p := &h.dbHarness + t := p.t + db := p.db + + batch := new(Batch) + for i := range rnd.Perm(n) { + batch.Reset() + batch.Put(tkey(i), tval(i, ctValSize)) + err := db.Write(batch, p.wo) + if err != nil { + t.Fatal("write error: ", err) + } + } +} + +func (h *dbCorruptHarness) deleteRand(n, max int, rnd *rand.Rand) { + p := &h.dbHarness + t := p.t + db := p.db + + batch := new(Batch) + for i := 0; i < n; i++ { + batch.Reset() + batch.Delete(tkey(rnd.Intn(max))) + err := db.Write(batch, p.wo) + if err != nil { + t.Fatal("write error: ", err) + } + } +} + +func (h *dbCorruptHarness) corrupt(ft storage.FileType, offset, n int) { + p := &h.dbHarness + t := p.t + + var file storage.File + ff, _ := p.stor.GetFiles(ft) + for _, f := range ff { + if file == nil || f.Num() > file.Num() { + file = f + } + } + if file == nil { + t.Fatalf("no such file with type %q", ft) + } + + r, err := file.Open() + if err != nil { + t.Fatal("cannot open file: ", err) + } + x, err := r.Seek(0, 2) + if err != nil { + t.Fatal("cannot query file size: ", err) + } + m := int(x) + if _, err := r.Seek(0, 0); err != nil { + t.Fatal(err) + } + + if offset < 0 { + if -offset > m { + offset = 0 + } else { + offset = m + offset + } + } + if offset > m { + offset = m + } + if offset+n > m { + n = m - offset + } + + buf := make([]byte, m) + _, err = io.ReadFull(r, buf) + if err != nil { + t.Fatal("cannot read file: ", err) + } + r.Close() + + for i := 0; i < n; i++ { + buf[offset+i] ^= 0x80 + } + + err = file.Remove() + if err != nil { + t.Fatal("cannot remove old file: ", err) + } + w, err := file.Create() + if err != nil { + t.Fatal("cannot create new file: ", err) + } + _, err = w.Write(buf) + if err != nil { + t.Fatal("cannot write new file: ", err) + } + w.Close() +} + +func (h *dbCorruptHarness) removeAll(ft storage.FileType) { + ff, err := h.stor.GetFiles(ft) + if err != nil { + h.t.Fatal("get files: ", err) + } + for _, f := range ff { + if err := f.Remove(); err != nil { + h.t.Error("remove file: ", err) + } + } +} + +func (h *dbCorruptHarness) removeOne(ft storage.FileType) { + ff, err := h.stor.GetFiles(ft) + if err != nil { + h.t.Fatal("get files: ", err) + } + f := ff[rand.Intn(len(ff))] + h.t.Logf("removing file @%d", f.Num()) + if err := f.Remove(); err != nil { + h.t.Error("remove file: ", err) + } +} + +func (h *dbCorruptHarness) check(min, max int) { + p := &h.dbHarness + t := p.t + db := p.db + + var n, badk, badv, missed, good int + iter := db.NewIterator(nil, p.ro) + for iter.Next() { + k := 0 + fmt.Sscanf(string(iter.Key()), "%d", &k) + if k < n { + badk++ + continue + } + missed += k - n + n = k + 1 + if !bytes.Equal(iter.Value(), tval(k, ctValSize)) { + badv++ + } else { + good++ + } + } + err := iter.Error() + iter.Release() + t.Logf("want=%d..%d got=%d badkeys=%d badvalues=%d missed=%d, err=%v", + min, max, good, badk, badv, missed, err) + if good < min || good > max { + t.Errorf("good entries number not in range") + } +} + +func TestCorruptDB_Journal(t *testing.T) { + h := newDbCorruptHarness(t) + + h.build(100) + h.check(100, 100) + h.closeDB() + h.corrupt(storage.TypeJournal, 19, 1) + h.corrupt(storage.TypeJournal, 32*1024+1000, 1) + + h.openDB() + h.check(36, 36) + + h.close() +} + +func TestCorruptDB_Table(t *testing.T) { + h := newDbCorruptHarness(t) + + h.build(100) + h.compactMem() + h.compactRangeAt(0, "", "") + h.compactRangeAt(1, "", "") + h.closeDB() + h.corrupt(storage.TypeTable, 100, 1) + + h.openDB() + h.check(99, 99) + + h.close() +} + +func TestCorruptDB_TableIndex(t *testing.T) { + h := newDbCorruptHarness(t) + + h.build(10000) + h.compactMem() + h.closeDB() + h.corrupt(storage.TypeTable, -2000, 500) + + h.openDB() + h.check(5000, 9999) + + h.close() +} + +func TestCorruptDB_MissingManifest(t *testing.T) { + rnd := rand.New(rand.NewSource(0x0badda7a)) + h := newDbCorruptHarnessWopt(t, &opt.Options{ + BlockCache: cache.NewLRUCache(100), + Strict: opt.StrictJournalChecksum, + WriteBuffer: 1000 * 60, + }) + + h.build(1000) + h.compactMem() + h.buildShuffled(1000, rnd) + h.compactMem() + h.deleteRand(500, 1000, rnd) + h.compactMem() + h.buildShuffled(1000, rnd) + h.compactMem() + h.deleteRand(500, 1000, rnd) + h.compactMem() + h.buildShuffled(1000, rnd) + h.compactMem() + h.closeDB() + + h.stor.SetIgnoreOpenErr(storage.TypeManifest) + h.removeAll(storage.TypeManifest) + h.openAssert(false) + h.stor.SetIgnoreOpenErr(0) + + h.recover() + h.check(1000, 1000) + h.build(1000) + h.compactMem() + h.compactRange("", "") + h.closeDB() + + h.recover() + h.check(1000, 1000) + + h.close() +} + +func TestCorruptDB_SequenceNumberRecovery(t *testing.T) { + h := newDbCorruptHarness(t) + + h.put("foo", "v1") + h.put("foo", "v2") + h.put("foo", "v3") + h.put("foo", "v4") + h.put("foo", "v5") + h.closeDB() + + h.recover() + h.getVal("foo", "v5") + h.put("foo", "v6") + h.getVal("foo", "v6") + + h.reopenDB() + h.getVal("foo", "v6") + + h.close() +} + +func TestCorruptDB_SequenceNumberRecoveryTable(t *testing.T) { + h := newDbCorruptHarness(t) + + h.put("foo", "v1") + h.put("foo", "v2") + h.put("foo", "v3") + h.compactMem() + h.put("foo", "v4") + h.put("foo", "v5") + h.compactMem() + h.closeDB() + + h.recover() + h.getVal("foo", "v5") + h.put("foo", "v6") + h.getVal("foo", "v6") + + h.reopenDB() + h.getVal("foo", "v6") + + h.close() +} + +func TestCorruptDB_CorruptedManifest(t *testing.T) { + h := newDbCorruptHarness(t) + + h.put("foo", "hello") + h.compactMem() + h.compactRange("", "") + h.closeDB() + h.corrupt(storage.TypeManifest, 0, 1000) + h.openAssert(false) + + h.recover() + h.getVal("foo", "hello") + + h.close() +} + +func TestCorruptDB_CompactionInputError(t *testing.T) { + h := newDbCorruptHarness(t) + + h.build(10) + h.compactMem() + h.closeDB() + h.corrupt(storage.TypeTable, 100, 1) + + h.openDB() + h.check(9, 9) + + h.build(10000) + h.check(10000, 10000) + + h.close() +} + +func TestCorruptDB_UnrelatedKeys(t *testing.T) { + h := newDbCorruptHarness(t) + + h.build(10) + h.compactMem() + h.closeDB() + h.corrupt(storage.TypeTable, 100, 1) + + h.openDB() + h.put(string(tkey(1000)), string(tval(1000, ctValSize))) + h.getVal(string(tkey(1000)), string(tval(1000, ctValSize))) + h.compactMem() + h.getVal(string(tkey(1000)), string(tval(1000, ctValSize))) + + h.close() +} + +func TestCorruptDB_Level0NewerFileHasOlderSeqnum(t *testing.T) { + h := newDbCorruptHarness(t) + + h.put("a", "v1") + h.put("b", "v1") + h.compactMem() + h.put("a", "v2") + h.put("b", "v2") + h.compactMem() + h.put("a", "v3") + h.put("b", "v3") + h.compactMem() + h.put("c", "v0") + h.put("d", "v0") + h.compactMem() + h.compactRangeAt(1, "", "") + h.closeDB() + + h.recover() + h.getVal("a", "v3") + h.getVal("b", "v3") + h.getVal("c", "v0") + h.getVal("d", "v0") + + h.close() +} + +func TestCorruptDB_RecoverInvalidSeq_Issue53(t *testing.T) { + h := newDbCorruptHarness(t) + + h.put("a", "v1") + h.put("b", "v1") + h.compactMem() + h.put("a", "v2") + h.put("b", "v2") + h.compactMem() + h.put("a", "v3") + h.put("b", "v3") + h.compactMem() + h.put("c", "v0") + h.put("d", "v0") + h.compactMem() + h.compactRangeAt(0, "", "") + h.closeDB() + + h.recover() + h.getVal("a", "v3") + h.getVal("b", "v3") + h.getVal("c", "v0") + h.getVal("d", "v0") + + h.close() +} + +func TestCorruptDB_MissingTableFiles(t *testing.T) { + h := newDbCorruptHarness(t) + + h.put("a", "v1") + h.put("b", "v1") + h.compactMem() + h.put("c", "v2") + h.put("d", "v2") + h.compactMem() + h.put("e", "v3") + h.put("f", "v3") + h.closeDB() + + h.removeOne(storage.TypeTable) + h.openAssert(false) + + h.close() +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db.go new file mode 100644 index 000000000..8e975dc63 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db.go @@ -0,0 +1,755 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "errors" + "fmt" + "io" + "os" + "runtime" + "strings" + "sync" + "time" + + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/journal" + "github.com/syndtr/goleveldb/leveldb/memdb" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/storage" + "github.com/syndtr/goleveldb/leveldb/table" + "github.com/syndtr/goleveldb/leveldb/util" +) + +// DB is a LevelDB database. +type DB struct { + // Need 64-bit alignment. + seq uint64 + + s *session + + // MemDB + memMu sync.RWMutex + mem *memdb.DB + frozenMem *memdb.DB + journal *journal.Writer + journalWriter storage.Writer + journalFile storage.File + frozenJournalFile storage.File + frozenSeq uint64 + + // Snapshot + snapsMu sync.Mutex + snapsRoot snapshotElement + + // Write + writeC chan *Batch + writeMergedC chan bool + writeLockC chan struct{} + writeAckC chan error + journalC chan *Batch + journalAckC chan error + + // Compaction + tcompCmdC chan cCmd + tcompPauseC chan chan<- struct{} + tcompTriggerC chan struct{} + mcompCmdC chan cCmd + mcompTriggerC chan struct{} + compErrC chan error + compErrSetC chan error + compStats [kNumLevels]cStats + + // Close + closeW sync.WaitGroup + closeC chan struct{} + closed uint32 + closer io.Closer +} + +func openDB(s *session) (*DB, error) { + s.log("db@open opening") + start := time.Now() + db := &DB{ + s: s, + // Initial sequence + seq: s.stSeq, + // Write + writeC: make(chan *Batch), + writeMergedC: make(chan bool), + writeLockC: make(chan struct{}, 1), + writeAckC: make(chan error), + journalC: make(chan *Batch), + journalAckC: make(chan error), + // Compaction + tcompCmdC: make(chan cCmd), + tcompPauseC: make(chan chan<- struct{}), + tcompTriggerC: make(chan struct{}, 1), + mcompCmdC: make(chan cCmd), + mcompTriggerC: make(chan struct{}, 1), + compErrC: make(chan error), + compErrSetC: make(chan error), + // Close + closeC: make(chan struct{}), + } + db.initSnapshot() + + if err := db.recoverJournal(); err != nil { + return nil, err + } + + // Remove any obsolete files. + if err := db.checkAndCleanFiles(); err != nil { + // Close journal. + if db.journal != nil { + db.journal.Close() + db.journalWriter.Close() + } + return nil, err + } + + // Don't include compaction error goroutine into wait group. + go db.compactionError() + + db.closeW.Add(3) + go db.tCompaction() + go db.mCompaction() + go db.jWriter() + + s.logf("db@open done T·%v", time.Since(start)) + + runtime.SetFinalizer(db, (*DB).Close) + return db, nil +} + +// Open opens or creates a DB for the given storage. +// The DB will be created if not exist, unless ErrorIfMissing is true. +// Also, if ErrorIfExist is true and the DB exist Open will returns +// os.ErrExist error. +// +// Open will return an error with type of ErrCorrupted if corruption +// detected in the DB. Corrupted DB can be recovered with Recover +// function. +// +// The DB must be closed after use, by calling Close method. +func Open(p storage.Storage, o *opt.Options) (db *DB, err error) { + s, err := newSession(p, o) + if err != nil { + return + } + defer func() { + if err != nil { + s.close() + s.release() + } + }() + + err = s.recover() + if err != nil { + if !os.IsNotExist(err) || s.o.GetErrorIfMissing() { + return + } + err = s.create() + if err != nil { + return + } + } else if s.o.GetErrorIfExist() { + err = os.ErrExist + return + } + + return openDB(s) +} + +// OpenFile opens or creates a DB for the given path. +// The DB will be created if not exist, unless ErrorIfMissing is true. +// Also, if ErrorIfExist is true and the DB exist OpenFile will returns +// os.ErrExist error. +// +// OpenFile uses standard file-system backed storage implementation as +// desribed in the leveldb/storage package. +// +// OpenFile will return an error with type of ErrCorrupted if corruption +// detected in the DB. Corrupted DB can be recovered with Recover +// function. +// +// The DB must be closed after use, by calling Close method. +func OpenFile(path string, o *opt.Options) (db *DB, err error) { + stor, err := storage.OpenFile(path) + if err != nil { + return + } + db, err = Open(stor, o) + if err != nil { + stor.Close() + } else { + db.closer = stor + } + return +} + +// Recover recovers and opens a DB with missing or corrupted manifest files +// for the given storage. It will ignore any manifest files, valid or not. +// The DB must already exist or it will returns an error. +// Also, Recover will ignore ErrorIfMissing and ErrorIfExist options. +// +// The DB must be closed after use, by calling Close method. +func Recover(p storage.Storage, o *opt.Options) (db *DB, err error) { + s, err := newSession(p, o) + if err != nil { + return + } + defer func() { + if err != nil { + s.close() + s.release() + } + }() + + err = recoverTable(s, o) + if err != nil { + return + } + return openDB(s) +} + +// RecoverFile recovers and opens a DB with missing or corrupted manifest files +// for the given path. It will ignore any manifest files, valid or not. +// The DB must already exist or it will returns an error. +// Also, Recover will ignore ErrorIfMissing and ErrorIfExist options. +// +// RecoverFile uses standard file-system backed storage implementation as desribed +// in the leveldb/storage package. +// +// The DB must be closed after use, by calling Close method. +func RecoverFile(path string, o *opt.Options) (db *DB, err error) { + stor, err := storage.OpenFile(path) + if err != nil { + return + } + db, err = Recover(stor, o) + if err != nil { + stor.Close() + } else { + db.closer = stor + } + return +} + +func recoverTable(s *session, o *opt.Options) error { + ff0, err := s.getFiles(storage.TypeTable) + if err != nil { + return err + } + ff1 := files(ff0) + ff1.sort() + + var mSeq uint64 + var good, corrupted int + rec := new(sessionRecord) + buildTable := func(iter iterator.Iterator) (tmp storage.File, size int64, err error) { + tmp = s.newTemp() + writer, err := tmp.Create() + if err != nil { + return + } + defer func() { + writer.Close() + if err != nil { + tmp.Remove() + tmp = nil + } + }() + tw := table.NewWriter(writer, o) + // Copy records. + for iter.Next() { + key := iter.Key() + if validIkey(key) { + err = tw.Append(key, iter.Value()) + if err != nil { + return + } + } + } + err = iter.Error() + if err != nil { + return + } + err = tw.Close() + if err != nil { + return + } + err = writer.Sync() + if err != nil { + return + } + size = int64(tw.BytesLen()) + return + } + recoverTable := func(file storage.File) error { + s.logf("table@recovery recovering @%d", file.Num()) + reader, err := file.Open() + if err != nil { + return err + } + defer reader.Close() + // Get file size. + size, err := reader.Seek(0, 2) + if err != nil { + return err + } + var tSeq uint64 + var tgood, tcorrupted, blockerr int + var min, max []byte + tr := table.NewReader(reader, size, nil, o) + iter := tr.NewIterator(nil, nil) + iter.(iterator.ErrorCallbackSetter).SetErrorCallback(func(err error) { + s.logf("table@recovery found error @%d %q", file.Num(), err) + blockerr++ + }) + // Scan the table. + for iter.Next() { + key := iter.Key() + _, seq, _, ok := parseIkey(key) + if !ok { + tcorrupted++ + continue + } + tgood++ + if seq > tSeq { + tSeq = seq + } + if min == nil { + min = append([]byte{}, key...) + } + max = append(max[:0], key...) + } + if err := iter.Error(); err != nil { + iter.Release() + return err + } + iter.Release() + if tgood > 0 { + if tcorrupted > 0 || blockerr > 0 { + // Rebuild the table. + s.logf("table@recovery rebuilding @%d", file.Num()) + iter := tr.NewIterator(nil, nil) + tmp, newSize, err := buildTable(iter) + iter.Release() + if err != nil { + return err + } + reader.Close() + if err := file.Replace(tmp); err != nil { + return err + } + size = newSize + } + if tSeq > mSeq { + mSeq = tSeq + } + // Add table to level 0. + rec.addTable(0, file.Num(), uint64(size), min, max) + s.logf("table@recovery recovered @%d N·%d C·%d B·%d S·%d Q·%d", file.Num(), tgood, tcorrupted, blockerr, size, tSeq) + } else { + s.logf("table@recovery unrecoverable @%d C·%d B·%d S·%d", file.Num(), tcorrupted, blockerr, size) + } + + good += tgood + corrupted += tcorrupted + + return nil + } + // Recover all tables. + if len(ff1) > 0 { + s.logf("table@recovery F·%d", len(ff1)) + s.markFileNum(ff1[len(ff1)-1].Num()) + for _, file := range ff1 { + if err := recoverTable(file); err != nil { + return err + } + } + s.logf("table@recovery recovered F·%d N·%d C·%d Q·%d", len(ff1), good, corrupted, mSeq) + } + // Set sequence number. + rec.setSeq(mSeq + 1) + // Create new manifest. + if err := s.create(); err != nil { + return err + } + // Commit. + return s.commit(rec) +} + +func (d *DB) recoverJournal() error { + s := d.s + + ff0, err := s.getFiles(storage.TypeJournal) + if err != nil { + return err + } + ff1 := files(ff0) + ff1.sort() + ff2 := make([]storage.File, 0, len(ff1)) + for _, file := range ff1 { + if file.Num() >= s.stJournalNum || file.Num() == s.stPrevJournalNum { + s.markFileNum(file.Num()) + ff2 = append(ff2, file) + } + } + + var jr *journal.Reader + var of storage.File + var mem *memdb.DB + batch := new(Batch) + cm := newCMem(s) + buf := new(util.Buffer) + // Options. + strict := s.o.GetStrict(opt.StrictJournal) + checksum := s.o.GetStrict(opt.StrictJournalChecksum) + writeBuffer := s.o.GetWriteBuffer() + recoverJournal := func(file storage.File) error { + s.logf("journal@recovery recovering @%d", file.Num()) + reader, err := file.Open() + if err != nil { + return err + } + defer reader.Close() + if jr == nil { + jr = journal.NewReader(reader, dropper{s, file}, strict, checksum) + } else { + jr.Reset(reader, dropper{s, file}, strict, checksum) + } + if of != nil { + if mem.Len() > 0 { + if err := cm.flush(mem, 0); err != nil { + return err + } + } + if err := cm.commit(file.Num(), d.seq); err != nil { + return err + } + cm.reset() + of.Remove() + of = nil + } + // Reset memdb. + mem.Reset() + for { + r, err := jr.Next() + if err != nil { + if err == io.EOF { + break + } + return err + } + buf.Reset() + if _, err := buf.ReadFrom(r); err != nil { + if strict { + return err + } + continue + } + if err := batch.decode(buf.Bytes()); err != nil { + return err + } + if err := batch.memReplay(mem); err != nil { + return err + } + d.seq = batch.seq + uint64(batch.len()) + if mem.Size() >= writeBuffer { + // Large enough, flush it. + if err := cm.flush(mem, 0); err != nil { + return err + } + // Reset memdb. + mem.Reset() + } + } + of = file + return nil + } + // Recover all journals. + if len(ff2) > 0 { + s.logf("journal@recovery F·%d", len(ff2)) + mem = memdb.New(s.icmp, writeBuffer) + for _, file := range ff2 { + if err := recoverJournal(file); err != nil { + return err + } + } + // Flush the last journal. + if mem.Len() > 0 { + if err := cm.flush(mem, 0); err != nil { + return err + } + } + } + // Create a new journal. + if _, err := d.newMem(0); err != nil { + return err + } + // Commit. + if err := cm.commit(d.journalFile.Num(), d.seq); err != nil { + // Close journal. + if d.journal != nil { + d.journal.Close() + d.journalWriter.Close() + } + return err + } + // Remove the last journal. + if of != nil { + of.Remove() + } + return nil +} + +func (d *DB) get(key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) { + s := d.s + + ikey := newIKey(key, seq, tSeek) + + em, fm := d.getMems() + for _, m := range [...]*memdb.DB{em, fm} { + if m == nil { + continue + } + mk, mv, me := m.Find(ikey) + if me == nil { + ukey, _, t, ok := parseIkey(mk) + if ok && s.icmp.uCompare(ukey, key) == 0 { + if t == tDel { + return nil, ErrNotFound + } + return mv, nil + } + } else if me != ErrNotFound { + return nil, me + } + } + + v := s.version() + value, cSched, err := v.get(ikey, ro) + v.release() + if cSched { + // Trigger table compaction. + d.compTrigger(d.tcompTriggerC) + } + return +} + +// Get gets the value for the given key. It returns ErrNotFound if the +// DB does not contain the key. +// +// The caller should not modify the contents of the returned slice, but +// it is safe to modify the contents of the argument after Get returns. +func (d *DB) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) { + err = d.ok() + if err != nil { + return + } + + return d.get(key, d.getSeq(), ro) +} + +// NewIterator returns an iterator for the latest snapshot of the +// uderlying DB. +// The returned iterator is not goroutine-safe, but it is safe to use +// multiple iterators concurrently, with each in a dedicated goroutine. +// It is also safe to use an iterator concurrently with modifying its +// underlying DB. The resultant key/value pairs are guaranteed to be +// consistent. +// +// Slice allows slicing the iterator to only contains keys in the given +// range. A nil Range.Start is treated as a key before all keys in the +// DB. And a nil Range.Limit is treated as a key after all keys in +// the DB. +// +// The iterator must be released after use, by calling Release method. +// +// Also read Iterator documentation of the leveldb/iterator package. +func (d *DB) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator { + if err := d.ok(); err != nil { + return iterator.NewEmptyIterator(err) + } + + p := d.newSnapshot() + defer p.Release() + return p.NewIterator(slice, ro) +} + +// GetSnapshot returns a latest snapshot of the underlying DB. A snapshot +// is a frozen snapshot of a DB state at a particular point in time. The +// content of snapshot are guaranteed to be consistent. +// +// The snapshot must be released after use, by calling Release method. +func (d *DB) GetSnapshot() (*Snapshot, error) { + if err := d.ok(); err != nil { + return nil, err + } + + return d.newSnapshot(), nil +} + +// GetProperty returns value of the given property name. +// +// Property names: +// leveldb.num-files-at-level{n} +// Returns the number of filer at level 'n'. +// leveldb.stats +// Returns statistics of the underlying DB. +// leveldb.sstables +// Returns sstables list for each level. +func (d *DB) GetProperty(name string) (value string, err error) { + err = d.ok() + if err != nil { + return + } + + const prefix = "leveldb." + if !strings.HasPrefix(name, prefix) { + return "", errors.New("leveldb: GetProperty: unknown property: " + name) + } + + p := name[len(prefix):] + + s := d.s + v := s.version() + defer v.release() + + switch { + case strings.HasPrefix(p, "num-files-at-level"): + var level uint + var rest string + n, _ := fmt.Scanf("%d%s", &level, &rest) + if n != 1 || level >= kNumLevels { + err = errors.New("leveldb: GetProperty: invalid property: " + name) + } else { + value = fmt.Sprint(v.tLen(int(level))) + } + case p == "stats": + value = "Compactions\n" + + " Level | Tables | Size(MB) | Time(sec) | Read(MB) | Write(MB)\n" + + "-------+------------+---------------+---------------+---------------+---------------\n" + for level, tt := range v.tables { + duration, read, write := d.compStats[level].get() + if len(tt) == 0 && duration == 0 { + continue + } + value += fmt.Sprintf(" %3d | %10d | %13.5f | %13.5f | %13.5f | %13.5f\n", + level, len(tt), float64(tt.size())/1048576.0, duration.Seconds(), + float64(read)/1048576.0, float64(write)/1048576.0) + } + case p == "sstables": + for level, tt := range v.tables { + value += fmt.Sprintf("--- level %d ---\n", level) + for _, t := range tt { + value += fmt.Sprintf("%d:%d[%q .. %q]\n", t.file.Num(), t.size, t.min, t.max) + } + } + default: + err = errors.New("leveldb: GetProperty: unknown property: " + name) + } + + return +} + +// SizeOf calculates approximate sizes of the given key ranges. +// The length of the returned sizes are equal with the length of the given +// ranges. The returned sizes measure storage space usage, so if the user +// data compresses by a factor of ten, the returned sizes will be one-tenth +// the size of the corresponding user data size. +// The results may not include the sizes of recently written data. +func (d *DB) SizeOf(ranges []util.Range) (Sizes, error) { + if err := d.ok(); err != nil { + return nil, err + } + + v := d.s.version() + defer v.release() + + sizes := make(Sizes, 0, len(ranges)) + for _, r := range ranges { + min := newIKey(r.Start, kMaxSeq, tSeek) + max := newIKey(r.Limit, kMaxSeq, tSeek) + start, err := v.offsetOf(min) + if err != nil { + return nil, err + } + limit, err := v.offsetOf(max) + if err != nil { + return nil, err + } + var size uint64 + if limit >= start { + size = limit - start + } + sizes = append(sizes, size) + } + + return sizes, nil +} + +// Close closes the DB. This will also releases any outstanding snapshot. +// +// It is not safe to close a DB until all outstanding iterators are released. +// It is valid to call Close multiple times. Other methods should not be +// called after the DB has been closed. +func (d *DB) Close() error { + if !d.setClosed() { + return ErrClosed + } + + s := d.s + start := time.Now() + s.log("db@close closing") + + // Clear the finalizer. + runtime.SetFinalizer(d, nil) + + // Get compaction error. + var err error + select { + case err = <-d.compErrC: + default: + } + + close(d.closeC) + + // Wait for the close WaitGroup. + d.closeW.Wait() + + // Close journal. + if d.journal != nil { + d.journal.Close() + d.journalWriter.Close() + } + + // Close session. + s.close() + s.logf("db@close done T·%v", time.Since(start)) + s.release() + + if d.closer != nil { + if err1 := d.closer.Close(); err == nil { + err = err1 + } + } + + d.s = nil + d.mem = nil + d.frozenMem = nil + d.journal = nil + d.journalWriter = nil + d.journalFile = nil + d.frozenJournalFile = nil + d.snapsRoot = snapshotElement{} + d.closer = nil + + return err +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_compaction.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_compaction.go new file mode 100644 index 000000000..c82bd9f28 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_compaction.go @@ -0,0 +1,688 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "errors" + "sync" + "time" + + "github.com/syndtr/goleveldb/leveldb/memdb" +) + +var ( + errCompactionTransactExiting = errors.New("leveldb: compaction transact exiting") +) + +type cStats struct { + sync.Mutex + duration time.Duration + read uint64 + write uint64 +} + +func (p *cStats) add(n *cStatsStaging) { + p.Lock() + p.duration += n.duration + p.read += n.read + p.write += n.write + p.Unlock() +} + +func (p *cStats) get() (duration time.Duration, read, write uint64) { + p.Lock() + defer p.Unlock() + return p.duration, p.read, p.write +} + +type cStatsStaging struct { + start time.Time + duration time.Duration + on bool + read uint64 + write uint64 +} + +func (p *cStatsStaging) startTimer() { + if !p.on { + p.start = time.Now() + p.on = true + } +} + +func (p *cStatsStaging) stopTimer() { + if p.on { + p.duration += time.Since(p.start) + p.on = false + } +} + +type cMem struct { + s *session + level int + rec *sessionRecord +} + +func newCMem(s *session) *cMem { + return &cMem{s: s, rec: new(sessionRecord)} +} + +func (c *cMem) flush(mem *memdb.DB, level int) error { + s := c.s + + // Write memdb to table + iter := mem.NewIterator(nil) + defer iter.Release() + t, n, err := s.tops.createFrom(iter) + if err != nil { + return err + } + + if level < 0 { + level = s.version_NB().pickLevel(t.min.ukey(), t.max.ukey()) + } + c.rec.addTableFile(level, t) + + s.logf("mem@flush created L%d@%d N·%d S·%s %q:%q", level, t.file.Num(), n, shortenb(int(t.size)), t.min, t.max) + + c.level = level + return nil +} + +func (c *cMem) reset() { + c.rec = new(sessionRecord) +} + +func (c *cMem) commit(journal, seq uint64) error { + c.rec.setJournalNum(journal) + c.rec.setSeq(seq) + // Commit changes + return c.s.commit(c.rec) +} + +func (d *DB) compactionError() { + var err error +noerr: + for { + select { + case _, _ = <-d.closeC: + return + case err = <-d.compErrSetC: + if err != nil { + goto haserr + } + } + } +haserr: + for { + select { + case _, _ = <-d.closeC: + return + case err = <-d.compErrSetC: + if err == nil { + goto noerr + } + case d.compErrC <- err: + } + } +} + +type compactionTransactCounter int + +func (cnt *compactionTransactCounter) incr() { + *cnt++ +} + +func (d *DB) compactionTransact(name string, exec func(cnt *compactionTransactCounter) error, rollback func() error) { + s := d.s + defer func() { + if x := recover(); x != nil { + if x == errCompactionTransactExiting && rollback != nil { + if err := rollback(); err != nil { + s.logf("%s rollback error %q", name, err) + } + } + panic(x) + } + }() + const ( + backoffMin = 1 * time.Second + backoffMax = 8 * time.Second + backoffMul = 2 * time.Second + ) + backoff := backoffMin + backoffT := time.NewTimer(backoff) + lastCnt := compactionTransactCounter(0) + for n := 0; ; n++ { + // Check wether the DB is closed. + if d.isClosed() { + s.logf("%s exiting", name) + d.compactionExitTransact() + } else if n > 0 { + s.logf("%s retrying N·%d", name, n) + } + + // Execute. + cnt := compactionTransactCounter(0) + err := exec(&cnt) + + // Set compaction error status. + select { + case d.compErrSetC <- err: + case _, _ = <-d.closeC: + s.logf("%s exiting", name) + d.compactionExitTransact() + } + if err == nil { + return + } + s.logf("%s error I·%d %q", name, cnt, err) + + // Reset backoff duration if counter is advancing. + if cnt > lastCnt { + backoff = backoffMin + lastCnt = cnt + } + + // Backoff. + backoffT.Reset(backoff) + if backoff < backoffMax { + backoff *= backoffMul + if backoff > backoffMax { + backoff = backoffMax + } + } + select { + case <-backoffT.C: + case _, _ = <-d.closeC: + s.logf("%s exiting", name) + d.compactionExitTransact() + } + } +} + +func (d *DB) compactionExitTransact() { + panic(errCompactionTransactExiting) +} + +func (d *DB) memCompaction() { + mem := d.getFrozenMem() + if mem == nil { + return + } + + s := d.s + c := newCMem(s) + stats := new(cStatsStaging) + + s.logf("mem@flush N·%d S·%s", mem.Len(), shortenb(mem.Size())) + + // Don't compact empty memdb. + if mem.Len() == 0 { + s.logf("mem@flush skipping") + // drop frozen mem + d.dropFrozenMem() + return + } + + // Pause table compaction. + ch := make(chan struct{}) + select { + case d.tcompPauseC <- (chan<- struct{})(ch): + case _, _ = <-d.closeC: + return + } + + d.compactionTransact("mem@flush", func(cnt *compactionTransactCounter) (err error) { + stats.startTimer() + defer stats.stopTimer() + return c.flush(mem, -1) + }, func() error { + for _, r := range c.rec.addedTables { + s.logf("mem@flush rollback @%d", r.num) + f := s.getTableFile(r.num) + if err := f.Remove(); err != nil { + return err + } + } + return nil + }) + + d.compactionTransact("mem@commit", func(cnt *compactionTransactCounter) (err error) { + stats.startTimer() + defer stats.stopTimer() + return c.commit(d.journalFile.Num(), d.frozenSeq) + }, nil) + + s.logf("mem@flush commited F·%d T·%v", len(c.rec.addedTables), stats.duration) + + for _, r := range c.rec.addedTables { + stats.write += r.size + } + d.compStats[c.level].add(stats) + + // Drop frozen mem. + d.dropFrozenMem() + + // Resume table compaction. + select { + case <-ch: + case _, _ = <-d.closeC: + return + } + + // Trigger table compaction. + d.compTrigger(d.mcompTriggerC) +} + +func (d *DB) tableCompaction(c *compaction, noTrivial bool) { + s := d.s + + rec := new(sessionRecord) + rec.addCompactionPointer(c.level, c.max) + + if !noTrivial && c.trivial() { + t := c.tables[0][0] + s.logf("table@move L%d@%d -> L%d", c.level, t.file.Num(), c.level+1) + rec.deleteTable(c.level, t.file.Num()) + rec.addTableFile(c.level+1, t) + d.compactionTransact("table@move", func(cnt *compactionTransactCounter) (err error) { + return s.commit(rec) + }, nil) + return + } + + var stats [2]cStatsStaging + for i, tt := range c.tables { + for _, t := range tt { + stats[i].read += t.size + // Insert deleted tables into record + rec.deleteTable(c.level+i, t.file.Num()) + } + } + sourceSize := int(stats[0].read + stats[1].read) + minSeq := d.minSeq() + s.logf("table@compaction L%d·%d -> L%d·%d S·%s Q·%d", c.level, len(c.tables[0]), c.level+1, len(c.tables[1]), shortenb(sourceSize), minSeq) + + var snapUkey []byte + var snapHasUkey bool + var snapSeq uint64 + var snapIter int + var snapDropCnt int + var dropCnt int + d.compactionTransact("table@build", func(cnt *compactionTransactCounter) (err error) { + ukey := append([]byte{}, snapUkey...) + hasUkey := snapHasUkey + lseq := snapSeq + dropCnt = snapDropCnt + snapSched := snapIter == 0 + + var tw *tWriter + finish := func() error { + t, err := tw.finish() + if err != nil { + return err + } + rec.addTableFile(c.level+1, t) + stats[1].write += t.size + s.logf("table@build created L%d@%d N·%d S·%s %q:%q", c.level+1, t.file.Num(), tw.tw.EntriesLen(), shortenb(int(t.size)), t.min, t.max) + return nil + } + + defer func() { + stats[1].stopTimer() + if tw != nil { + tw.drop() + tw = nil + } + }() + + stats[1].startTimer() + iter := c.newIterator() + defer iter.Release() + for i := 0; iter.Next(); i++ { + // Incr transact counter. + cnt.incr() + + // Skip until last state. + if i < snapIter { + continue + } + + key := iKey(iter.Key()) + + if c.shouldStopBefore(key) && tw != nil { + err = finish() + if err != nil { + return + } + snapSched = true + tw = nil + } + + // Scheduled for snapshot, snapshot will used to retry compaction + // if error occured. + if snapSched { + snapUkey = append(snapUkey[:0], ukey...) + snapHasUkey = hasUkey + snapSeq = lseq + snapIter = i + snapDropCnt = dropCnt + snapSched = false + } + + if seq, t, ok := key.parseNum(); !ok { + // Don't drop error keys + ukey = ukey[:0] + hasUkey = false + lseq = kMaxSeq + } else { + if !hasUkey || s.icmp.uCompare(key.ukey(), ukey) != 0 { + // First occurrence of this user key + ukey = append(ukey[:0], key.ukey()...) + hasUkey = true + lseq = kMaxSeq + } + + drop := false + if lseq <= minSeq { + // Dropped because newer entry for same user key exist + drop = true // (A) + } else if t == tDel && seq <= minSeq && c.isBaseLevelForKey(ukey) { + // For this user key: + // (1) there is no data in higher levels + // (2) data in lower levels will have larger seq numbers + // (3) data in layers that are being compacted here and have + // smaller seq numbers will be dropped in the next + // few iterations of this loop (by rule (A) above). + // Therefore this deletion marker is obsolete and can be dropped. + drop = true + } + + lseq = seq + if drop { + dropCnt++ + continue + } + } + + // Create new table if not already + if tw == nil { + // Check for pause event. + select { + case ch := <-d.tcompPauseC: + d.pauseCompaction(ch) + case _, _ = <-d.closeC: + d.compactionExitTransact() + default: + } + + // Create new table. + tw, err = s.tops.create() + if err != nil { + return + } + } + + // Write key/value into table + err = tw.add(key, iter.Value()) + if err != nil { + return + } + + // Finish table if it is big enough + if tw.tw.BytesLen() >= kMaxTableSize { + err = finish() + if err != nil { + return + } + snapSched = true + tw = nil + } + } + + err = iter.Error() + if err != nil { + return + } + + // Finish last table + if tw != nil && !tw.empty() { + err = finish() + if err != nil { + return + } + tw = nil + } + return + }, func() error { + for _, r := range rec.addedTables { + s.logf("table@build rollback @%d", r.num) + f := s.getTableFile(r.num) + if err := f.Remove(); err != nil { + return err + } + } + return nil + }) + + // Commit changes + d.compactionTransact("table@commit", func(cnt *compactionTransactCounter) (err error) { + stats[1].startTimer() + defer stats[1].stopTimer() + return s.commit(rec) + }, nil) + + resultSize := int(int(stats[1].write)) + s.logf("table@compaction commited F%s S%s D·%d T·%v", sint(len(rec.addedTables)-len(rec.deletedTables)), sshortenb(resultSize-sourceSize), dropCnt, stats[1].duration) + + // Save compaction stats + for i := range stats { + d.compStats[c.level+1].add(&stats[i]) + } +} + +func (d *DB) tableRangeCompaction(level int, min, max []byte) { + s := d.s + s.logf("table@compaction range L%d %q:%q", level, min, max) + + if level >= 0 { + if c := s.getCompactionRange(level, min, max); c != nil { + d.tableCompaction(c, true) + } + } else { + v := s.version_NB() + m := 1 + for i, t := range v.tables[1:] { + if t.isOverlaps(min, max, true, s.icmp) { + m = i + 1 + } + } + for level := 0; level < m; level++ { + if c := s.getCompactionRange(level, min, max); c != nil { + d.tableCompaction(c, true) + } + } + } +} + +func (d *DB) tableAutoCompaction() { + if c := d.s.pickCompaction(); c != nil { + d.tableCompaction(c, false) + } +} + +func (d *DB) tableNeedCompaction() bool { + return d.s.version_NB().needCompaction() +} + +func (d *DB) pauseCompaction(ch chan<- struct{}) { + select { + case ch <- struct{}{}: + case _, _ = <-d.closeC: + d.compactionExitTransact() + } +} + +type cCmd interface { + ack(err error) +} + +type cIdle struct { + ackC chan<- error +} + +func (r cIdle) ack(err error) { + r.ackC <- err +} + +type cRange struct { + level int + min, max []byte + ackC chan<- error +} + +func (r cRange) ack(err error) { + defer func() { + recover() + }() + if r.ackC != nil { + r.ackC <- err + } +} + +func (d *DB) compSendIdle(compC chan<- cCmd) error { + ch := make(chan error) + defer close(ch) + // Send cmd. + select { + case compC <- cIdle{ch}: + case err := <-d.compErrC: + return err + case _, _ = <-d.closeC: + return ErrClosed + } + // Wait cmd. + return <-ch +} + +func (d *DB) compSendRange(compC chan<- cCmd, level int, min, max []byte) (err error) { + ch := make(chan error) + defer close(ch) + // Send cmd. + select { + case compC <- cRange{level, min, max, ch}: + case err := <-d.compErrC: + return err + case _, _ = <-d.closeC: + return ErrClosed + } + // Wait cmd. + select { + case err = <-d.compErrC: + case err = <-ch: + } + return err +} + +func (d *DB) compTrigger(compTriggerC chan struct{}) { + select { + case compTriggerC <- struct{}{}: + default: + } +} + +func (d *DB) mCompaction() { + var x cCmd + + defer func() { + if x := recover(); x != nil { + if x != errCompactionTransactExiting { + panic(x) + } + } + if x != nil { + x.ack(ErrClosed) + } + d.closeW.Done() + }() + + for { + select { + case _, _ = <-d.closeC: + return + case x = <-d.mcompCmdC: + d.memCompaction() + x.ack(nil) + x = nil + case <-d.mcompTriggerC: + d.memCompaction() + } + } +} + +func (d *DB) tCompaction() { + var x cCmd + var ackQ []cCmd + + defer func() { + if x := recover(); x != nil { + if x != errCompactionTransactExiting { + panic(x) + } + } + for i := range ackQ { + ackQ[i].ack(ErrClosed) + ackQ[i] = nil + } + if x != nil { + x.ack(ErrClosed) + } + d.closeW.Done() + }() + + for { + if d.tableNeedCompaction() { + select { + case x = <-d.tcompCmdC: + case <-d.tcompTriggerC: + case _, _ = <-d.closeC: + return + case ch := <-d.tcompPauseC: + d.pauseCompaction(ch) + continue + default: + } + } else { + for i := range ackQ { + ackQ[i].ack(nil) + ackQ[i] = nil + } + ackQ = ackQ[:0] + select { + case x = <-d.tcompCmdC: + case <-d.tcompTriggerC: + case ch := <-d.tcompPauseC: + d.pauseCompaction(ch) + continue + case _, _ = <-d.closeC: + return + } + } + if x != nil { + switch cmd := x.(type) { + case cIdle: + ackQ = append(ackQ, x) + case cRange: + d.tableRangeCompaction(cmd.level, cmd.min, cmd.max) + x.ack(nil) + } + x = nil + } + d.tableAutoCompaction() + } +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_iter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_iter.go new file mode 100644 index 000000000..9973a8fef --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_iter.go @@ -0,0 +1,310 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "errors" + "runtime" + + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/util" +) + +var ( + errInvalidIkey = errors.New("leveldb: Iterator: invalid internal key") +) + +func (db *DB) newRawIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator { + s := db.s + + em, fm := db.getMems() + v := s.version() + + ti := v.getIterators(slice, ro) + n := len(ti) + 2 + i := make([]iterator.Iterator, 0, n) + i = append(i, em.NewIterator(slice)) + if fm != nil { + i = append(i, fm.NewIterator(slice)) + } + i = append(i, ti...) + strict := s.o.GetStrict(opt.StrictIterator) || ro.GetStrict(opt.StrictIterator) + mi := iterator.NewMergedIterator(i, s.icmp, strict) + mi.SetReleaser(&versionReleaser{v: v}) + return mi +} + +func (db *DB) newIterator(seq uint64, slice *util.Range, ro *opt.ReadOptions) *dbIter { + var slice_ *util.Range + if slice != nil { + slice_ = &util.Range{} + if slice.Start != nil { + slice_.Start = newIKey(slice.Start, kMaxSeq, tSeek) + } + if slice.Limit != nil { + slice_.Limit = newIKey(slice.Limit, kMaxSeq, tSeek) + } + } + rawIter := db.newRawIterator(slice_, ro) + iter := &dbIter{ + icmp: db.s.icmp, + iter: rawIter, + seq: seq, + strict: db.s.o.GetStrict(opt.StrictIterator) || ro.GetStrict(opt.StrictIterator), + key: make([]byte, 0), + value: make([]byte, 0), + } + runtime.SetFinalizer(iter, (*dbIter).Release) + return iter +} + +type dir int + +const ( + dirReleased dir = iota - 1 + dirSOI + dirEOI + dirBackward + dirForward +) + +// dbIter represent an interator states over a database session. +type dbIter struct { + icmp *iComparer + iter iterator.Iterator + seq uint64 + strict bool + + dir dir + key []byte + value []byte + err error + releaser util.Releaser +} + +func (i *dbIter) setErr(err error) { + i.err = err + i.key = nil + i.value = nil +} + +func (i *dbIter) iterErr() { + if err := i.iter.Error(); err != nil { + i.setErr(err) + } +} + +func (i *dbIter) Valid() bool { + return i.err == nil && i.dir > dirEOI +} + +func (i *dbIter) First() bool { + if i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + if i.iter.First() { + i.dir = dirSOI + return i.next() + } + i.dir = dirEOI + i.iterErr() + return false +} + +func (i *dbIter) Last() bool { + if i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + if i.iter.Last() { + return i.prev() + } + i.dir = dirSOI + i.iterErr() + return false +} + +func (i *dbIter) Seek(key []byte) bool { + if i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + ikey := newIKey(key, i.seq, tSeek) + if i.iter.Seek(ikey) { + i.dir = dirSOI + return i.next() + } + i.dir = dirEOI + i.iterErr() + return false +} + +func (i *dbIter) next() bool { + for { + ukey, seq, t, ok := parseIkey(i.iter.Key()) + if ok { + if seq <= i.seq { + switch t { + case tDel: + // Skip deleted key. + i.key = append(i.key[:0], ukey...) + i.dir = dirForward + case tVal: + if i.dir == dirSOI || i.icmp.uCompare(ukey, i.key) > 0 { + i.key = append(i.key[:0], ukey...) + i.value = append(i.value[:0], i.iter.Value()...) + i.dir = dirForward + return true + } + } + } + } else if i.strict { + i.setErr(errInvalidIkey) + break + } + if !i.iter.Next() { + i.dir = dirEOI + i.iterErr() + break + } + } + return false +} + +func (i *dbIter) Next() bool { + if i.dir == dirEOI || i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + if !i.iter.Next() || (i.dir == dirBackward && !i.iter.Next()) { + i.dir = dirEOI + i.iterErr() + return false + } + return i.next() +} + +func (i *dbIter) prev() bool { + i.dir = dirBackward + del := true + if i.iter.Valid() { + for { + ukey, seq, t, ok := parseIkey(i.iter.Key()) + if ok { + if seq <= i.seq { + if !del && i.icmp.uCompare(ukey, i.key) < 0 { + return true + } + del = (t == tDel) + if !del { + i.key = append(i.key[:0], ukey...) + i.value = append(i.value[:0], i.iter.Value()...) + } + } + } else if i.strict { + i.setErr(errInvalidIkey) + return false + } + if !i.iter.Prev() { + break + } + } + } + if del { + i.dir = dirSOI + i.iterErr() + return false + } + return true +} + +func (i *dbIter) Prev() bool { + if i.dir == dirSOI || i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + switch i.dir { + case dirEOI: + return i.Last() + case dirForward: + for i.iter.Prev() { + ukey, _, _, ok := parseIkey(i.iter.Key()) + if ok { + if i.icmp.uCompare(ukey, i.key) < 0 { + goto cont + } + } else if i.strict { + i.setErr(errInvalidIkey) + return false + } + } + i.dir = dirSOI + i.iterErr() + return false + } + +cont: + return i.prev() +} + +func (i *dbIter) Key() []byte { + if i.err != nil || i.dir <= dirEOI { + return nil + } + return i.key +} + +func (i *dbIter) Value() []byte { + if i.err != nil || i.dir <= dirEOI { + return nil + } + return i.value +} + +func (i *dbIter) Release() { + if i.dir != dirReleased { + // Clear the finalizer. + runtime.SetFinalizer(i, nil) + + if i.releaser != nil { + i.releaser.Release() + } + + i.dir = dirReleased + i.key = nil + i.value = nil + i.iter.Release() + i.iter = nil + } +} + +func (i *dbIter) SetReleaser(releaser util.Releaser) { + if i.dir != dirReleased { + i.releaser = releaser + } +} + +func (i *dbIter) Error() error { + return i.err +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_snapshot.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_snapshot.go new file mode 100644 index 000000000..225b7cd5e --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_snapshot.go @@ -0,0 +1,165 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "runtime" + "sync" + + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/util" +) + +type snapshotElement struct { + seq uint64 + ref int + // Next and previous pointers in the doubly-linked list of elements. + next, prev *snapshotElement +} + +// Initialize the snapshot. +func (db *DB) initSnapshot() { + db.snapsRoot.next = &db.snapsRoot + db.snapsRoot.prev = &db.snapsRoot +} + +// Acquires a snapshot, based on latest sequence. +func (db *DB) acquireSnapshot() *snapshotElement { + db.snapsMu.Lock() + seq := db.getSeq() + elem := db.snapsRoot.prev + if elem == &db.snapsRoot || elem.seq != seq { + at := db.snapsRoot.prev + next := at.next + elem = &snapshotElement{ + seq: seq, + prev: at, + next: next, + } + at.next = elem + next.prev = elem + } + elem.ref++ + db.snapsMu.Unlock() + return elem +} + +// Releases given snapshot element. +func (db *DB) releaseSnapshot(elem *snapshotElement) { + if !db.isClosed() { + db.snapsMu.Lock() + elem.ref-- + if elem.ref == 0 { + elem.prev.next = elem.next + elem.next.prev = elem.prev + elem.next = nil + elem.prev = nil + } else if elem.ref < 0 { + panic("leveldb: Snapshot: negative element reference") + } + db.snapsMu.Unlock() + } +} + +// Gets minimum sequence that not being snapshoted. +func (db *DB) minSeq() uint64 { + db.snapsMu.Lock() + defer db.snapsMu.Unlock() + elem := db.snapsRoot.prev + if elem != &db.snapsRoot { + return elem.seq + } + return db.getSeq() +} + +// Snapshot is a DB snapshot. +type Snapshot struct { + db *DB + elem *snapshotElement + mu sync.Mutex + released bool +} + +// Creates new snapshot object. +func (db *DB) newSnapshot() *Snapshot { + p := &Snapshot{ + db: db, + elem: db.acquireSnapshot(), + } + runtime.SetFinalizer(p, (*Snapshot).Release) + return p +} + +// Get gets the value for the given key. It returns ErrNotFound if +// the DB does not contain the key. +// +// The caller should not modify the contents of the returned slice, but +// it is safe to modify the contents of the argument after Get returns. +func (p *Snapshot) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) { + db := p.db + err = db.ok() + if err != nil { + return + } + p.mu.Lock() + defer p.mu.Unlock() + if p.released { + err = ErrSnapshotReleased + return + } + return db.get(key, p.elem.seq, ro) +} + +// NewIterator returns an iterator for the snapshot of the uderlying DB. +// The returned iterator is not goroutine-safe, but it is safe to use +// multiple iterators concurrently, with each in a dedicated goroutine. +// It is also safe to use an iterator concurrently with modifying its +// underlying DB. The resultant key/value pairs are guaranteed to be +// consistent. +// +// Slice allows slicing the iterator to only contains keys in the given +// range. A nil Range.Start is treated as a key before all keys in the +// DB. And a nil Range.Limit is treated as a key after all keys in +// the DB. +// +// The iterator must be released after use, by calling Release method. +// Releasing the snapshot doesn't mean releasing the iterator too, the +// iterator would be still valid until released. +// +// Also read Iterator documentation of the leveldb/iterator package. +func (p *Snapshot) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator { + db := p.db + if err := db.ok(); err != nil { + return iterator.NewEmptyIterator(err) + } + p.mu.Lock() + defer p.mu.Unlock() + if p.released { + return iterator.NewEmptyIterator(ErrSnapshotReleased) + } + return db.newIterator(p.elem.seq, slice, ro) +} + +// Release releases the snapshot. This will not release any returned +// iterators, the iterators would still be valid until released or the +// underlying DB is closed. +// +// Other methods should not be called after the snapshot has been released. +func (p *Snapshot) Release() { + p.mu.Lock() + if !p.released { + // Clear the finalizer. + runtime.SetFinalizer(p, nil) + + p.released = true + p.db.releaseSnapshot(p.elem) + p.db = nil + p.elem = nil + } + p.mu.Unlock() +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_state.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_state.go new file mode 100644 index 000000000..a13706142 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_state.go @@ -0,0 +1,114 @@ +// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "sync/atomic" + + "github.com/syndtr/goleveldb/leveldb/journal" + "github.com/syndtr/goleveldb/leveldb/memdb" +) + +// Get latest sequence number. +func (d *DB) getSeq() uint64 { + return atomic.LoadUint64(&d.seq) +} + +// Atomically adds delta to seq. +func (d *DB) addSeq(delta uint64) { + atomic.AddUint64(&d.seq, delta) +} + +// Create new memdb and froze the old one; need external synchronization. +// newMem only called synchronously by the writer. +func (d *DB) newMem(n int) (mem *memdb.DB, err error) { + s := d.s + + num := s.allocFileNum() + file := s.getJournalFile(num) + w, err := file.Create() + if err != nil { + s.reuseFileNum(num) + return + } + d.memMu.Lock() + if d.journal == nil { + d.journal = journal.NewWriter(w) + } else { + d.journal.Reset(w) + d.journalWriter.Close() + d.frozenJournalFile = d.journalFile + } + d.journalWriter = w + d.journalFile = file + d.frozenMem = d.mem + d.mem = memdb.New(s.icmp, maxInt(d.s.o.GetWriteBuffer(), n)) + mem = d.mem + // The seq only incremented by the writer. + d.frozenSeq = d.seq + d.memMu.Unlock() + return +} + +// Get all memdbs. +func (d *DB) getMems() (e *memdb.DB, f *memdb.DB) { + d.memMu.RLock() + defer d.memMu.RUnlock() + return d.mem, d.frozenMem +} + +// Get frozen memdb. +func (d *DB) getEffectiveMem() *memdb.DB { + d.memMu.RLock() + defer d.memMu.RUnlock() + return d.mem +} + +// Check whether we has frozen memdb. +func (d *DB) hasFrozenMem() bool { + d.memMu.RLock() + defer d.memMu.RUnlock() + return d.frozenMem != nil +} + +// Get frozen memdb. +func (d *DB) getFrozenMem() *memdb.DB { + d.memMu.RLock() + defer d.memMu.RUnlock() + return d.frozenMem +} + +// Drop frozen memdb; assume that frozen memdb isn't nil. +func (d *DB) dropFrozenMem() { + d.memMu.Lock() + if err := d.frozenJournalFile.Remove(); err != nil { + d.s.logf("journal@remove removing @%d %q", d.frozenJournalFile.Num(), err) + } else { + d.s.logf("journal@remove removed @%d", d.frozenJournalFile.Num()) + } + d.frozenJournalFile = nil + d.frozenMem = nil + d.memMu.Unlock() +} + +// Set closed flag; return true if not already closed. +func (d *DB) setClosed() bool { + return atomic.CompareAndSwapUint32(&d.closed, 0, 1) +} + +// Check whether DB was closed. +func (d *DB) isClosed() bool { + return atomic.LoadUint32(&d.closed) != 0 +} + +// Check read ok status. +func (d *DB) ok() error { + if d.isClosed() { + return ErrClosed + } + return nil +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_test.go new file mode 100644 index 000000000..5de7d9723 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_test.go @@ -0,0 +1,1888 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "fmt" + "math/rand" + "os" + "path/filepath" + "runtime" + "strings" + "sync" + "sync/atomic" + "testing" + "time" + "unsafe" + + "github.com/syndtr/goleveldb/leveldb/comparer" + "github.com/syndtr/goleveldb/leveldb/filter" + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/storage" + "github.com/syndtr/goleveldb/leveldb/util" +) + +func tkey(i int) []byte { + return []byte(fmt.Sprintf("%016d", i)) +} + +func tval(seed, n int) []byte { + r := rand.New(rand.NewSource(int64(seed))) + return randomString(r, n) +} + +type dbHarness struct { + t *testing.T + + stor *testStorage + db *DB + o *opt.Options + ro *opt.ReadOptions + wo *opt.WriteOptions +} + +func newDbHarnessWopt(t *testing.T, o *opt.Options) *dbHarness { + h := new(dbHarness) + h.init(t, o) + return h +} + +func newDbHarness(t *testing.T) *dbHarness { + return newDbHarnessWopt(t, &opt.Options{}) +} + +func (h *dbHarness) init(t *testing.T, o *opt.Options) { + h.t = t + h.stor = newTestStorage(t) + h.o = o + h.ro = nil + h.wo = nil + + if err := h.openDB0(); err != nil { + // So that it will come after fatal message. + defer h.stor.Close() + h.t.Fatal("Open (init): got error: ", err) + } +} + +func (h *dbHarness) openDB0() (err error) { + h.t.Log("opening DB") + h.db, err = Open(h.stor, h.o) + return +} + +func (h *dbHarness) openDB() { + if err := h.openDB0(); err != nil { + h.t.Fatal("Open: got error: ", err) + } +} + +func (h *dbHarness) closeDB0() error { + h.t.Log("closing DB") + return h.db.Close() +} + +func (h *dbHarness) closeDB() { + if err := h.closeDB0(); err != nil { + h.t.Error("Close: got error: ", err) + } + h.stor.CloseCheck() + runtime.GC() +} + +func (h *dbHarness) reopenDB() { + h.closeDB() + h.openDB() +} + +func (h *dbHarness) close() { + h.closeDB0() + h.db = nil + h.stor.Close() + h.stor = nil + runtime.GC() +} + +func (h *dbHarness) openAssert(want bool) { + db, err := Open(h.stor, h.o) + if err != nil { + if want { + h.t.Error("Open: assert: got error: ", err) + } else { + h.t.Log("Open: assert: got error (expected): ", err) + } + } else { + if !want { + h.t.Error("Open: assert: expect error") + } + db.Close() + } +} + +func (h *dbHarness) write(batch *Batch) { + if err := h.db.Write(batch, h.wo); err != nil { + h.t.Error("Write: got error: ", err) + } +} + +func (h *dbHarness) put(key, value string) { + if err := h.db.Put([]byte(key), []byte(value), h.wo); err != nil { + h.t.Error("Put: got error: ", err) + } +} + +func (h *dbHarness) putMulti(n int, low, hi string) { + for i := 0; i < n; i++ { + h.put(low, "begin") + h.put(hi, "end") + h.compactMem() + } +} + +func (h *dbHarness) maxNextLevelOverlappingBytes(want uint64) { + t := h.t + db := h.db + + var res uint64 + v := db.s.version() + for i, tt := range v.tables[1 : len(v.tables)-1] { + level := i + 1 + next := v.tables[level+1] + for _, t := range tt { + var r tFiles + min, max := t.min.ukey(), t.max.ukey() + next.getOverlaps(min, max, &r, true, db.s.icmp.ucmp) + sum := r.size() + if sum > res { + res = sum + } + } + } + v.release() + + if res > want { + t.Errorf("next level overlapping bytes is more than %d, got=%d", want, res) + } +} + +func (h *dbHarness) delete(key string) { + t := h.t + db := h.db + + err := db.Delete([]byte(key), h.wo) + if err != nil { + t.Error("Delete: got error: ", err) + } +} + +func (h *dbHarness) assertNumKeys(want int) { + iter := h.db.NewIterator(nil, h.ro) + defer iter.Release() + got := 0 + for iter.Next() { + got++ + } + if err := iter.Error(); err != nil { + h.t.Error("assertNumKeys: ", err) + } + if want != got { + h.t.Errorf("assertNumKeys: want=%d got=%d", want, got) + } +} + +func (h *dbHarness) getr(db Reader, key string, expectFound bool) (found bool, v []byte) { + t := h.t + v, err := db.Get([]byte(key), h.ro) + switch err { + case ErrNotFound: + if expectFound { + t.Errorf("Get: key '%s' not found, want found", key) + } + case nil: + found = true + if !expectFound { + t.Errorf("Get: key '%s' found, want not found", key) + } + default: + t.Error("Get: got error: ", err) + } + return +} + +func (h *dbHarness) get(key string, expectFound bool) (found bool, v []byte) { + return h.getr(h.db, key, expectFound) +} + +func (h *dbHarness) getValr(db Reader, key, value string) { + t := h.t + found, r := h.getr(db, key, true) + if !found { + return + } + rval := string(r) + if rval != value { + t.Errorf("Get: invalid value, got '%s', want '%s'", rval, value) + } +} + +func (h *dbHarness) getVal(key, value string) { + h.getValr(h.db, key, value) +} + +func (h *dbHarness) allEntriesFor(key, want string) { + t := h.t + db := h.db + s := db.s + + ikey := newIKey([]byte(key), kMaxSeq, tVal) + iter := db.newRawIterator(nil, nil) + if !iter.Seek(ikey) && iter.Error() != nil { + t.Error("AllEntries: error during seek, err: ", iter.Error()) + return + } + res := "[ " + first := true + for iter.Valid() { + rkey := iKey(iter.Key()) + if _, t, ok := rkey.parseNum(); ok { + if s.icmp.uCompare(ikey.ukey(), rkey.ukey()) != 0 { + break + } + if !first { + res += ", " + } + first = false + switch t { + case tVal: + res += string(iter.Value()) + case tDel: + res += "DEL" + } + } else { + if !first { + res += ", " + } + first = false + res += "CORRUPTED" + } + iter.Next() + } + if !first { + res += " " + } + res += "]" + if res != want { + t.Errorf("AllEntries: assert failed for key %q, got=%q want=%q", key, res, want) + } +} + +// Return a string that contains all key,value pairs in order, +// formatted like "(k1->v1)(k2->v2)". +func (h *dbHarness) getKeyVal(want string) { + t := h.t + db := h.db + + s, err := db.GetSnapshot() + if err != nil { + t.Fatal("GetSnapshot: got error: ", err) + } + res := "" + iter := s.NewIterator(nil, nil) + for iter.Next() { + res += fmt.Sprintf("(%s->%s)", string(iter.Key()), string(iter.Value())) + } + iter.Release() + + if res != want { + t.Errorf("GetKeyVal: invalid key/value pair, got=%q want=%q", res, want) + } + s.Release() +} + +func (h *dbHarness) waitCompaction() { + t := h.t + db := h.db + if err := db.compSendIdle(db.tcompCmdC); err != nil { + t.Error("compaction error: ", err) + } +} + +func (h *dbHarness) waitMemCompaction() { + t := h.t + db := h.db + + if err := db.compSendIdle(db.mcompCmdC); err != nil { + t.Error("compaction error: ", err) + } +} + +func (h *dbHarness) compactMem() { + t := h.t + db := h.db + + db.writeLockC <- struct{}{} + defer func() { + <-db.writeLockC + }() + + if _, err := db.rotateMem(0); err != nil { + t.Error("compaction error: ", err) + } + if err := db.compSendIdle(db.mcompCmdC); err != nil { + t.Error("compaction error: ", err) + } + + if h.totalTables() == 0 { + t.Error("zero tables after mem compaction") + } +} + +func (h *dbHarness) compactRangeAtErr(level int, min, max string, wanterr bool) { + t := h.t + db := h.db + + var _min, _max []byte + if min != "" { + _min = []byte(min) + } + if max != "" { + _max = []byte(max) + } + + if err := db.compSendRange(db.tcompCmdC, level, _min, _max); err != nil { + if wanterr { + t.Log("CompactRangeAt: got error (expected): ", err) + } else { + t.Error("CompactRangeAt: got error: ", err) + } + } else if wanterr { + t.Error("CompactRangeAt: expect error") + } +} + +func (h *dbHarness) compactRangeAt(level int, min, max string) { + h.compactRangeAtErr(level, min, max, false) +} + +func (h *dbHarness) compactRange(min, max string) { + t := h.t + db := h.db + + var r util.Range + if min != "" { + r.Start = []byte(min) + } + if max != "" { + r.Limit = []byte(max) + } + if err := db.CompactRange(r); err != nil { + t.Error("CompactRange: got error: ", err) + } +} + +func (h *dbHarness) sizeAssert(start, limit string, low, hi uint64) { + t := h.t + db := h.db + + s, err := db.SizeOf([]util.Range{ + {[]byte(start), []byte(limit)}, + }) + if err != nil { + t.Error("SizeOf: got error: ", err) + } + if s.Sum() < low || s.Sum() > hi { + t.Errorf("sizeof %q to %q not in range, want %d - %d, got %d", + shorten(start), shorten(limit), low, hi, s.Sum()) + } +} + +func (h *dbHarness) getSnapshot() (s *Snapshot) { + s, err := h.db.GetSnapshot() + if err != nil { + h.t.Fatal("GetSnapshot: got error: ", err) + } + return +} +func (h *dbHarness) tablesPerLevel(want string) { + res := "" + nz := 0 + v := h.db.s.version() + for level, tt := range v.tables { + if level > 0 { + res += "," + } + res += fmt.Sprint(len(tt)) + if len(tt) > 0 { + nz = len(res) + } + } + v.release() + res = res[:nz] + if res != want { + h.t.Errorf("invalid tables len, want=%s, got=%s", want, res) + } +} + +func (h *dbHarness) totalTables() (n int) { + v := h.db.s.version() + for _, tt := range v.tables { + n += len(tt) + } + v.release() + return +} + +type keyValue interface { + Key() []byte + Value() []byte +} + +func testKeyVal(t *testing.T, kv keyValue, want string) { + res := string(kv.Key()) + "->" + string(kv.Value()) + if res != want { + t.Errorf("invalid key/value, want=%q, got=%q", want, res) + } +} + +func numKey(num int) string { + return fmt.Sprintf("key%06d", num) +} + +var _bloom_filter = filter.NewBloomFilter(10) + +func truno(t *testing.T, o *opt.Options, f func(h *dbHarness)) { + for i := 0; i < 4; i++ { + func() { + switch i { + case 0: + case 1: + if o == nil { + o = &opt.Options{Filter: _bloom_filter} + } else { + old := o + o = &opt.Options{} + *o = *old + o.Filter = _bloom_filter + } + case 2: + if o == nil { + o = &opt.Options{Compression: opt.NoCompression} + } else { + old := o + o = &opt.Options{} + *o = *old + o.Compression = opt.NoCompression + } + } + h := newDbHarnessWopt(t, o) + defer h.close() + switch i { + case 3: + h.reopenDB() + } + f(h) + }() + } +} + +func trun(t *testing.T, f func(h *dbHarness)) { + truno(t, nil, f) +} + +func testAligned(t *testing.T, name string, offset uintptr) { + if offset%8 != 0 { + t.Errorf("field %s offset is not 64-bit aligned", name) + } +} + +func Test_FieldsAligned(t *testing.T) { + p1 := new(DB) + testAligned(t, "DB.seq", unsafe.Offsetof(p1.seq)) + p2 := new(session) + testAligned(t, "session.stFileNum", unsafe.Offsetof(p2.stFileNum)) + testAligned(t, "session.stJournalNum", unsafe.Offsetof(p2.stJournalNum)) + testAligned(t, "session.stPrevJournalNum", unsafe.Offsetof(p2.stPrevJournalNum)) + testAligned(t, "session.stSeq", unsafe.Offsetof(p2.stSeq)) +} + +func TestDb_Locking(t *testing.T) { + h := newDbHarness(t) + defer h.stor.Close() + h.openAssert(false) + h.closeDB() + h.openAssert(true) +} + +func TestDb_Empty(t *testing.T) { + trun(t, func(h *dbHarness) { + h.get("foo", false) + + h.reopenDB() + h.get("foo", false) + }) +} + +func TestDb_ReadWrite(t *testing.T) { + trun(t, func(h *dbHarness) { + h.put("foo", "v1") + h.getVal("foo", "v1") + h.put("bar", "v2") + h.put("foo", "v3") + h.getVal("foo", "v3") + h.getVal("bar", "v2") + + h.reopenDB() + h.getVal("foo", "v3") + h.getVal("bar", "v2") + }) +} + +func TestDb_PutDeleteGet(t *testing.T) { + trun(t, func(h *dbHarness) { + h.put("foo", "v1") + h.getVal("foo", "v1") + h.put("foo", "v2") + h.getVal("foo", "v2") + h.delete("foo") + h.get("foo", false) + + h.reopenDB() + h.get("foo", false) + }) +} + +func TestDb_EmptyBatch(t *testing.T) { + h := newDbHarness(t) + defer h.close() + + h.get("foo", false) + err := h.db.Write(new(Batch), h.wo) + if err != nil { + t.Error("writing empty batch yield error: ", err) + } + h.get("foo", false) +} + +func TestDb_GetFromFrozen(t *testing.T) { + h := newDbHarnessWopt(t, &opt.Options{WriteBuffer: 100100}) + defer h.close() + + h.put("foo", "v1") + h.getVal("foo", "v1") + + h.stor.DelaySync(storage.TypeTable) // Block sync calls + h.put("k1", strings.Repeat("x", 100000)) // Fill memtable + h.put("k2", strings.Repeat("y", 100000)) // Trigger compaction + for i := 0; h.db.getFrozenMem() == nil && i < 100; i++ { + time.Sleep(10 * time.Microsecond) + } + if h.db.getFrozenMem() == nil { + h.stor.ReleaseSync(storage.TypeTable) + t.Fatal("No frozen mem") + } + h.getVal("foo", "v1") + h.stor.ReleaseSync(storage.TypeTable) // Release sync calls + + h.reopenDB() + h.getVal("foo", "v1") + h.get("k1", true) + h.get("k2", true) +} + +func TestDb_GetFromTable(t *testing.T) { + trun(t, func(h *dbHarness) { + h.put("foo", "v1") + h.compactMem() + h.getVal("foo", "v1") + }) +} + +func TestDb_GetSnapshot(t *testing.T) { + trun(t, func(h *dbHarness) { + bar := strings.Repeat("b", 200) + h.put("foo", "v1") + h.put(bar, "v1") + + snap, err := h.db.GetSnapshot() + if err != nil { + t.Fatal("GetSnapshot: got error: ", err) + } + + h.put("foo", "v2") + h.put(bar, "v2") + + h.getVal("foo", "v2") + h.getVal(bar, "v2") + h.getValr(snap, "foo", "v1") + h.getValr(snap, bar, "v1") + + h.compactMem() + + h.getVal("foo", "v2") + h.getVal(bar, "v2") + h.getValr(snap, "foo", "v1") + h.getValr(snap, bar, "v1") + + snap.Release() + + h.reopenDB() + h.getVal("foo", "v2") + h.getVal(bar, "v2") + }) +} + +func TestDb_GetLevel0Ordering(t *testing.T) { + trun(t, func(h *dbHarness) { + for i := 0; i < 4; i++ { + h.put("bar", fmt.Sprintf("b%d", i)) + h.put("foo", fmt.Sprintf("v%d", i)) + h.compactMem() + } + h.getVal("foo", "v3") + h.getVal("bar", "b3") + + v := h.db.s.version() + t0len := v.tLen(0) + v.release() + if t0len < 2 { + t.Errorf("level-0 tables is less than 2, got %d", t0len) + } + + h.reopenDB() + h.getVal("foo", "v3") + h.getVal("bar", "b3") + }) +} + +func TestDb_GetOrderedByLevels(t *testing.T) { + trun(t, func(h *dbHarness) { + h.put("foo", "v1") + h.compactMem() + h.compactRange("a", "z") + h.getVal("foo", "v1") + h.put("foo", "v2") + h.compactMem() + h.getVal("foo", "v2") + }) +} + +func TestDb_GetPicksCorrectFile(t *testing.T) { + trun(t, func(h *dbHarness) { + // Arrange to have multiple files in a non-level-0 level. + h.put("a", "va") + h.compactMem() + h.compactRange("a", "b") + h.put("x", "vx") + h.compactMem() + h.compactRange("x", "y") + h.put("f", "vf") + h.compactMem() + h.compactRange("f", "g") + + h.getVal("a", "va") + h.getVal("f", "vf") + h.getVal("x", "vx") + + h.compactRange("", "") + h.getVal("a", "va") + h.getVal("f", "vf") + h.getVal("x", "vx") + }) +} + +func TestDb_GetEncountersEmptyLevel(t *testing.T) { + trun(t, func(h *dbHarness) { + // Arrange for the following to happen: + // * sstable A in level 0 + // * nothing in level 1 + // * sstable B in level 2 + // Then do enough Get() calls to arrange for an automatic compaction + // of sstable A. A bug would cause the compaction to be marked as + // occuring at level 1 (instead of the correct level 0). + + // Step 1: First place sstables in levels 0 and 2 + for i := 0; ; i++ { + if i >= 100 { + t.Fatal("could not fill levels-0 and level-2") + } + v := h.db.s.version() + if v.tLen(0) > 0 && v.tLen(2) > 0 { + v.release() + break + } + v.release() + h.put("a", "begin") + h.put("z", "end") + h.compactMem() + + h.getVal("a", "begin") + h.getVal("z", "end") + } + + // Step 2: clear level 1 if necessary. + h.compactRangeAt(1, "", "") + h.tablesPerLevel("1,0,1") + + h.getVal("a", "begin") + h.getVal("z", "end") + + // Step 3: read a bunch of times + for i := 0; i < 200; i++ { + h.get("missing", false) + } + + // Step 4: Wait for compaction to finish + h.waitCompaction() + + v := h.db.s.version() + if v.tLen(0) > 0 { + t.Errorf("level-0 tables more than 0, got %d", v.tLen(0)) + } + v.release() + + h.getVal("a", "begin") + h.getVal("z", "end") + }) +} + +func TestDb_IterMultiWithDelete(t *testing.T) { + trun(t, func(h *dbHarness) { + h.put("a", "va") + h.put("b", "vb") + h.put("c", "vc") + h.delete("b") + h.get("b", false) + + iter := h.db.NewIterator(nil, nil) + iter.Seek([]byte("c")) + testKeyVal(t, iter, "c->vc") + iter.Prev() + testKeyVal(t, iter, "a->va") + iter.Release() + + h.compactMem() + + iter = h.db.NewIterator(nil, nil) + iter.Seek([]byte("c")) + testKeyVal(t, iter, "c->vc") + iter.Prev() + testKeyVal(t, iter, "a->va") + iter.Release() + }) +} + +func TestDb_IteratorPinsRef(t *testing.T) { + h := newDbHarness(t) + defer h.close() + + h.put("foo", "hello") + + // Get iterator that will yield the current contents of the DB. + iter := h.db.NewIterator(nil, nil) + + // Write to force compactions + h.put("foo", "newvalue1") + for i := 0; i < 100; i++ { + h.put(numKey(i), strings.Repeat(fmt.Sprintf("v%09d", i), 100000/10)) + } + h.put("foo", "newvalue2") + + iter.First() + testKeyVal(t, iter, "foo->hello") + if iter.Next() { + t.Errorf("expect eof") + } + iter.Release() +} + +func TestDb_Recover(t *testing.T) { + trun(t, func(h *dbHarness) { + h.put("foo", "v1") + h.put("baz", "v5") + + h.reopenDB() + h.getVal("foo", "v1") + + h.getVal("foo", "v1") + h.getVal("baz", "v5") + h.put("bar", "v2") + h.put("foo", "v3") + + h.reopenDB() + h.getVal("foo", "v3") + h.put("foo", "v4") + h.getVal("foo", "v4") + h.getVal("bar", "v2") + h.getVal("baz", "v5") + }) +} + +func TestDb_RecoverWithEmptyJournal(t *testing.T) { + trun(t, func(h *dbHarness) { + h.put("foo", "v1") + h.put("foo", "v2") + + h.reopenDB() + h.reopenDB() + h.put("foo", "v3") + + h.reopenDB() + h.getVal("foo", "v3") + }) +} + +func TestDb_RecoverDuringMemtableCompaction(t *testing.T) { + truno(t, &opt.Options{WriteBuffer: 1000000}, func(h *dbHarness) { + + h.stor.DelaySync(storage.TypeTable) + h.put("big1", strings.Repeat("x", 10000000)) + h.put("big2", strings.Repeat("y", 1000)) + h.put("bar", "v2") + h.stor.ReleaseSync(storage.TypeTable) + + h.reopenDB() + h.getVal("bar", "v2") + h.getVal("big1", strings.Repeat("x", 10000000)) + h.getVal("big2", strings.Repeat("y", 1000)) + }) +} + +func TestDb_MinorCompactionsHappen(t *testing.T) { + h := newDbHarnessWopt(t, &opt.Options{WriteBuffer: 10000}) + defer h.close() + + n := 500 + + key := func(i int) string { + return fmt.Sprintf("key%06d", i) + } + + for i := 0; i < n; i++ { + h.put(key(i), key(i)+strings.Repeat("v", 1000)) + } + + for i := 0; i < n; i++ { + h.getVal(key(i), key(i)+strings.Repeat("v", 1000)) + } + + h.reopenDB() + for i := 0; i < n; i++ { + h.getVal(key(i), key(i)+strings.Repeat("v", 1000)) + } +} + +func TestDb_RecoverWithLargeJournal(t *testing.T) { + h := newDbHarness(t) + defer h.close() + + h.put("big1", strings.Repeat("1", 200000)) + h.put("big2", strings.Repeat("2", 200000)) + h.put("small3", strings.Repeat("3", 10)) + h.put("small4", strings.Repeat("4", 10)) + h.tablesPerLevel("") + + // Make sure that if we re-open with a small write buffer size that + // we flush table files in the middle of a large journal file. + h.o.WriteBuffer = 100000 + h.reopenDB() + h.getVal("big1", strings.Repeat("1", 200000)) + h.getVal("big2", strings.Repeat("2", 200000)) + h.getVal("small3", strings.Repeat("3", 10)) + h.getVal("small4", strings.Repeat("4", 10)) + v := h.db.s.version() + if v.tLen(0) <= 1 { + t.Errorf("tables-0 less than one") + } + v.release() +} + +func TestDb_CompactionsGenerateMultipleFiles(t *testing.T) { + h := newDbHarnessWopt(t, &opt.Options{ + WriteBuffer: 10000000, + Compression: opt.NoCompression, + }) + defer h.close() + + v := h.db.s.version() + if v.tLen(0) > 0 { + t.Errorf("level-0 tables more than 0, got %d", v.tLen(0)) + } + v.release() + + n := 80 + + // Write 8MB (80 values, each 100K) + for i := 0; i < n; i++ { + h.put(numKey(i), strings.Repeat(fmt.Sprintf("v%09d", i), 100000/10)) + } + + // Reopening moves updates to level-0 + h.reopenDB() + h.compactRangeAt(0, "", "") + + v = h.db.s.version() + if v.tLen(0) > 0 { + t.Errorf("level-0 tables more than 0, got %d", v.tLen(0)) + } + if v.tLen(1) <= 1 { + t.Errorf("level-1 tables less than 1, got %d", v.tLen(1)) + } + v.release() + + for i := 0; i < n; i++ { + h.getVal(numKey(i), strings.Repeat(fmt.Sprintf("v%09d", i), 100000/10)) + } +} + +func TestDb_RepeatedWritesToSameKey(t *testing.T) { + h := newDbHarnessWopt(t, &opt.Options{WriteBuffer: 100000}) + defer h.close() + + maxTables := kNumLevels + kL0_StopWritesTrigger + + value := strings.Repeat("v", 2*h.o.GetWriteBuffer()) + for i := 0; i < 5*maxTables; i++ { + h.put("key", value) + n := h.totalTables() + if n > maxTables { + t.Errorf("total tables exceed %d, got=%d, iter=%d", maxTables, n, i) + } + } +} + +func TestDb_RepeatedWritesToSameKeyAfterReopen(t *testing.T) { + h := newDbHarnessWopt(t, &opt.Options{WriteBuffer: 100000}) + defer h.close() + + h.reopenDB() + + maxTables := kNumLevels + kL0_StopWritesTrigger + + value := strings.Repeat("v", 2*h.o.GetWriteBuffer()) + for i := 0; i < 5*maxTables; i++ { + h.put("key", value) + n := h.totalTables() + if n > maxTables { + t.Errorf("total tables exceed %d, got=%d, iter=%d", maxTables, n, i) + } + } +} + +func TestDb_SparseMerge(t *testing.T) { + h := newDbHarnessWopt(t, &opt.Options{Compression: opt.NoCompression}) + defer h.close() + + h.putMulti(kNumLevels, "A", "Z") + + // Suppose there is: + // small amount of data with prefix A + // large amount of data with prefix B + // small amount of data with prefix C + // and that recent updates have made small changes to all three prefixes. + // Check that we do not do a compaction that merges all of B in one shot. + h.put("A", "va") + value := strings.Repeat("x", 1000) + for i := 0; i < 100000; i++ { + h.put(fmt.Sprintf("B%010d", i), value) + } + h.put("C", "vc") + h.compactMem() + h.compactRangeAt(0, "", "") + h.waitCompaction() + + // Make sparse update + h.put("A", "va2") + h.put("B100", "bvalue2") + h.put("C", "vc2") + h.compactMem() + + h.maxNextLevelOverlappingBytes(20 * 1048576) + h.compactRangeAt(0, "", "") + h.waitCompaction() + h.maxNextLevelOverlappingBytes(20 * 1048576) + h.compactRangeAt(1, "", "") + h.waitCompaction() + h.maxNextLevelOverlappingBytes(20 * 1048576) +} + +func TestDb_SizeOf(t *testing.T) { + h := newDbHarnessWopt(t, &opt.Options{ + Compression: opt.NoCompression, + WriteBuffer: 10000000, + }) + defer h.close() + + h.sizeAssert("", "xyz", 0, 0) + h.reopenDB() + h.sizeAssert("", "xyz", 0, 0) + + // Write 8MB (80 values, each 100K) + n := 80 + s1 := 100000 + s2 := 105000 + + for i := 0; i < n; i++ { + h.put(numKey(i), strings.Repeat(fmt.Sprintf("v%09d", i), s1/10)) + } + + // 0 because SizeOf() does not account for memtable space + h.sizeAssert("", numKey(50), 0, 0) + + for r := 0; r < 3; r++ { + h.reopenDB() + + for cs := 0; cs < n; cs += 10 { + for i := 0; i < n; i += 10 { + h.sizeAssert("", numKey(i), uint64(s1*i), uint64(s2*i)) + h.sizeAssert("", numKey(i)+".suffix", uint64(s1*(i+1)), uint64(s2*(i+1))) + h.sizeAssert(numKey(i), numKey(i+10), uint64(s1*10), uint64(s2*10)) + } + + h.sizeAssert("", numKey(50), uint64(s1*50), uint64(s2*50)) + h.sizeAssert("", numKey(50)+".suffix", uint64(s1*50), uint64(s2*50)) + + h.compactRangeAt(0, numKey(cs), numKey(cs+9)) + } + + v := h.db.s.version() + if v.tLen(0) != 0 { + t.Errorf("level-0 tables was not zero, got %d", v.tLen(0)) + } + if v.tLen(1) == 0 { + t.Error("level-1 tables was zero") + } + v.release() + } +} + +func TestDb_SizeOf_MixOfSmallAndLarge(t *testing.T) { + h := newDbHarnessWopt(t, &opt.Options{Compression: opt.NoCompression}) + defer h.close() + + sizes := []uint64{ + 10000, + 10000, + 100000, + 10000, + 100000, + 10000, + 300000, + 10000, + } + + for i, n := range sizes { + h.put(numKey(i), strings.Repeat(fmt.Sprintf("v%09d", i), int(n)/10)) + } + + for r := 0; r < 3; r++ { + h.reopenDB() + + var x uint64 + for i, n := range sizes { + y := x + if i > 0 { + y += 1000 + } + h.sizeAssert("", numKey(i), x, y) + x += n + } + + h.sizeAssert(numKey(3), numKey(5), 110000, 111000) + + h.compactRangeAt(0, "", "") + } +} + +func TestDb_Snapshot(t *testing.T) { + trun(t, func(h *dbHarness) { + h.put("foo", "v1") + s1 := h.getSnapshot() + h.put("foo", "v2") + s2 := h.getSnapshot() + h.put("foo", "v3") + s3 := h.getSnapshot() + h.put("foo", "v4") + + h.getValr(s1, "foo", "v1") + h.getValr(s2, "foo", "v2") + h.getValr(s3, "foo", "v3") + h.getVal("foo", "v4") + + s3.Release() + h.getValr(s1, "foo", "v1") + h.getValr(s2, "foo", "v2") + h.getVal("foo", "v4") + + s1.Release() + h.getValr(s2, "foo", "v2") + h.getVal("foo", "v4") + + s2.Release() + h.getVal("foo", "v4") + }) +} + +func TestDb_HiddenValuesAreRemoved(t *testing.T) { + trun(t, func(h *dbHarness) { + s := h.db.s + + h.put("foo", "v1") + h.compactMem() + m := kMaxMemCompactLevel + v := s.version() + num := v.tLen(m) + v.release() + if num != 1 { + t.Errorf("invalid level-%d len, want=1 got=%d", m, num) + } + + // Place a table at level last-1 to prevent merging with preceding mutation + h.put("a", "begin") + h.put("z", "end") + h.compactMem() + v = s.version() + if v.tLen(m) != 1 { + t.Errorf("invalid level-%d len, want=1 got=%d", m, v.tLen(m)) + } + if v.tLen(m-1) != 1 { + t.Errorf("invalid level-%d len, want=1 got=%d", m-1, v.tLen(m-1)) + } + v.release() + + h.delete("foo") + h.put("foo", "v2") + h.allEntriesFor("foo", "[ v2, DEL, v1 ]") + h.compactMem() + h.allEntriesFor("foo", "[ v2, DEL, v1 ]") + h.compactRangeAt(m-2, "", "z") + // DEL eliminated, but v1 remains because we aren't compacting that level + // (DEL can be eliminated because v2 hides v1). + h.allEntriesFor("foo", "[ v2, v1 ]") + h.compactRangeAt(m-1, "", "") + // Merging last-1 w/ last, so we are the base level for "foo", so + // DEL is removed. (as is v1). + h.allEntriesFor("foo", "[ v2 ]") + }) +} + +func TestDb_DeletionMarkers2(t *testing.T) { + h := newDbHarness(t) + defer h.close() + s := h.db.s + + h.put("foo", "v1") + h.compactMem() + m := kMaxMemCompactLevel + v := s.version() + num := v.tLen(m) + v.release() + if num != 1 { + t.Errorf("invalid level-%d len, want=1 got=%d", m, num) + } + + // Place a table at level last-1 to prevent merging with preceding mutation + h.put("a", "begin") + h.put("z", "end") + h.compactMem() + v = s.version() + if v.tLen(m) != 1 { + t.Errorf("invalid level-%d len, want=1 got=%d", m, v.tLen(m)) + } + if v.tLen(m-1) != 1 { + t.Errorf("invalid level-%d len, want=1 got=%d", m-1, v.tLen(m-1)) + } + v.release() + + h.delete("foo") + h.allEntriesFor("foo", "[ DEL, v1 ]") + h.compactMem() // Moves to level last-2 + h.allEntriesFor("foo", "[ DEL, v1 ]") + h.compactRangeAt(m-2, "", "") + // DEL kept: "last" file overlaps + h.allEntriesFor("foo", "[ DEL, v1 ]") + h.compactRangeAt(m-1, "", "") + // Merging last-1 w/ last, so we are the base level for "foo", so + // DEL is removed. (as is v1). + h.allEntriesFor("foo", "[ ]") +} + +func TestDb_CompactionTableOpenError(t *testing.T) { + h := newDbHarnessWopt(t, &opt.Options{MaxOpenFiles: 0}) + defer h.close() + + im := 10 + jm := 10 + for r := 0; r < 2; r++ { + for i := 0; i < im; i++ { + for j := 0; j < jm; j++ { + h.put(fmt.Sprintf("k%d,%d", i, j), fmt.Sprintf("v%d,%d", i, j)) + } + h.compactMem() + } + } + + if n := h.totalTables(); n != im*2 { + t.Errorf("total tables is %d, want %d", n, im) + } + + h.stor.SetOpenErr(storage.TypeTable) + go h.db.CompactRange(util.Range{}) + if err := h.db.compSendIdle(h.db.tcompCmdC); err != nil { + t.Log("compaction error: ", err) + } + h.closeDB0() + h.openDB() + h.stor.SetOpenErr(0) + + for i := 0; i < im; i++ { + for j := 0; j < jm; j++ { + h.getVal(fmt.Sprintf("k%d,%d", i, j), fmt.Sprintf("v%d,%d", i, j)) + } + } +} + +func TestDb_OverlapInLevel0(t *testing.T) { + trun(t, func(h *dbHarness) { + if kMaxMemCompactLevel != 2 { + t.Fatal("fix test to reflect the config") + } + + // Fill levels 1 and 2 to disable the pushing of new memtables to levels > 0. + h.put("100", "v100") + h.put("999", "v999") + h.compactMem() + h.delete("100") + h.delete("999") + h.compactMem() + h.tablesPerLevel("0,1,1") + + // Make files spanning the following ranges in level-0: + // files[0] 200 .. 900 + // files[1] 300 .. 500 + // Note that files are sorted by min key. + h.put("300", "v300") + h.put("500", "v500") + h.compactMem() + h.put("200", "v200") + h.put("600", "v600") + h.put("900", "v900") + h.compactMem() + h.tablesPerLevel("2,1,1") + + // Compact away the placeholder files we created initially + h.compactRangeAt(1, "", "") + h.compactRangeAt(2, "", "") + h.tablesPerLevel("2") + + // Do a memtable compaction. Before bug-fix, the compaction would + // not detect the overlap with level-0 files and would incorrectly place + // the deletion in a deeper level. + h.delete("600") + h.compactMem() + h.tablesPerLevel("3") + h.get("600", false) + }) +} + +func TestDb_L0_CompactionBug_Issue44_a(t *testing.T) { + h := newDbHarness(t) + defer h.close() + + h.reopenDB() + h.put("b", "v") + h.reopenDB() + h.delete("b") + h.delete("a") + h.reopenDB() + h.delete("a") + h.reopenDB() + h.put("a", "v") + h.reopenDB() + h.reopenDB() + h.getKeyVal("(a->v)") + h.waitCompaction() + h.getKeyVal("(a->v)") +} + +func TestDb_L0_CompactionBug_Issue44_b(t *testing.T) { + h := newDbHarness(t) + defer h.close() + + h.reopenDB() + h.put("", "") + h.reopenDB() + h.delete("e") + h.put("", "") + h.reopenDB() + h.put("c", "cv") + h.reopenDB() + h.put("", "") + h.reopenDB() + h.put("", "") + h.waitCompaction() + h.reopenDB() + h.put("d", "dv") + h.reopenDB() + h.put("", "") + h.reopenDB() + h.delete("d") + h.delete("b") + h.reopenDB() + h.getKeyVal("(->)(c->cv)") + h.waitCompaction() + h.getKeyVal("(->)(c->cv)") +} + +func TestDb_SingleEntryMemCompaction(t *testing.T) { + trun(t, func(h *dbHarness) { + for i := 0; i < 10; i++ { + h.put("big", strings.Repeat("v", opt.DefaultWriteBuffer)) + h.compactMem() + h.put("key", strings.Repeat("v", opt.DefaultBlockSize)) + h.compactMem() + h.put("k", "v") + h.compactMem() + h.put("", "") + h.compactMem() + h.put("verybig", strings.Repeat("v", opt.DefaultWriteBuffer*2)) + h.compactMem() + } + }) +} + +func TestDb_ManifestWriteError(t *testing.T) { + for i := 0; i < 2; i++ { + func() { + h := newDbHarness(t) + defer h.close() + + h.put("foo", "bar") + h.getVal("foo", "bar") + + // Mem compaction (will succeed) + h.compactMem() + h.getVal("foo", "bar") + v := h.db.s.version() + if n := v.tLen(kMaxMemCompactLevel); n != 1 { + t.Errorf("invalid total tables, want=1 got=%d", n) + } + v.release() + + if i == 0 { + h.stor.SetWriteErr(storage.TypeManifest) + } else { + h.stor.SetSyncErr(storage.TypeManifest) + } + + // Merging compaction (will fail) + h.compactRangeAtErr(kMaxMemCompactLevel, "", "", true) + + h.db.Close() + h.stor.SetWriteErr(0) + h.stor.SetSyncErr(0) + + // Should not lose data + h.openDB() + h.getVal("foo", "bar") + }() + } +} + +func assertErr(t *testing.T, err error, wanterr bool) { + if err != nil { + if wanterr { + t.Log("AssertErr: got error (expected): ", err) + } else { + t.Error("AssertErr: got error: ", err) + } + } else if wanterr { + t.Error("AssertErr: expect error") + } +} + +func TestDb_ClosedIsClosed(t *testing.T) { + h := newDbHarness(t) + db := h.db + + var iter, iter2 iterator.Iterator + var snap *Snapshot + func() { + defer h.close() + + h.put("k", "v") + h.getVal("k", "v") + + iter = db.NewIterator(nil, h.ro) + iter.Seek([]byte("k")) + testKeyVal(t, iter, "k->v") + + var err error + snap, err = db.GetSnapshot() + if err != nil { + t.Fatal("GetSnapshot: got error: ", err) + } + + h.getValr(snap, "k", "v") + + iter2 = snap.NewIterator(nil, h.ro) + iter2.Seek([]byte("k")) + testKeyVal(t, iter2, "k->v") + + h.put("foo", "v2") + h.delete("foo") + + // closing DB + iter.Release() + iter2.Release() + }() + + assertErr(t, db.Put([]byte("x"), []byte("y"), h.wo), true) + _, err := db.Get([]byte("k"), h.ro) + assertErr(t, err, true) + + if iter.Valid() { + t.Errorf("iter.Valid should false") + } + assertErr(t, iter.Error(), false) + testKeyVal(t, iter, "->") + if iter.Seek([]byte("k")) { + t.Errorf("iter.Seek should false") + } + assertErr(t, iter.Error(), true) + + assertErr(t, iter2.Error(), false) + + _, err = snap.Get([]byte("k"), h.ro) + assertErr(t, err, true) + + _, err = db.GetSnapshot() + assertErr(t, err, true) + + iter3 := db.NewIterator(nil, h.ro) + assertErr(t, iter3.Error(), true) + + iter3 = snap.NewIterator(nil, h.ro) + assertErr(t, iter3.Error(), true) + + assertErr(t, db.Delete([]byte("k"), h.wo), true) + + _, err = db.GetProperty("leveldb.stats") + assertErr(t, err, true) + + _, err = db.SizeOf([]util.Range{{[]byte("a"), []byte("z")}}) + assertErr(t, err, true) + + assertErr(t, db.CompactRange(util.Range{}), true) + + assertErr(t, db.Close(), true) +} + +type numberComparer struct{} + +func (numberComparer) num(x []byte) (n int) { + fmt.Sscan(string(x[1:len(x)-1]), &n) + return +} + +func (numberComparer) Name() string { + return "test.NumberComparer" +} + +func (p numberComparer) Compare(a, b []byte) int { + return p.num(a) - p.num(b) +} + +func (numberComparer) Separator(dst, a, b []byte) []byte { return nil } +func (numberComparer) Successor(dst, b []byte) []byte { return nil } + +func TestDb_CustomComparer(t *testing.T) { + h := newDbHarnessWopt(t, &opt.Options{ + Comparer: numberComparer{}, + WriteBuffer: 1000, + }) + defer h.close() + + h.put("[10]", "ten") + h.put("[0x14]", "twenty") + for i := 0; i < 2; i++ { + h.getVal("[10]", "ten") + h.getVal("[0xa]", "ten") + h.getVal("[20]", "twenty") + h.getVal("[0x14]", "twenty") + h.get("[15]", false) + h.get("[0xf]", false) + h.compactMem() + h.compactRange("[0]", "[9999]") + } + + for n := 0; n < 2; n++ { + for i := 0; i < 100; i++ { + v := fmt.Sprintf("[%d]", i*10) + h.put(v, v) + } + h.compactMem() + h.compactRange("[0]", "[1000000]") + } +} + +func TestDb_ManualCompaction(t *testing.T) { + h := newDbHarness(t) + defer h.close() + + if kMaxMemCompactLevel != 2 { + t.Fatal("fix test to reflect the config") + } + + h.putMulti(3, "p", "q") + h.tablesPerLevel("1,1,1") + + // Compaction range falls before files + h.compactRange("", "c") + h.tablesPerLevel("1,1,1") + + // Compaction range falls after files + h.compactRange("r", "z") + h.tablesPerLevel("1,1,1") + + // Compaction range overlaps files + h.compactRange("p1", "p9") + h.tablesPerLevel("0,0,1") + + // Populate a different range + h.putMulti(3, "c", "e") + h.tablesPerLevel("1,1,2") + + // Compact just the new range + h.compactRange("b", "f") + h.tablesPerLevel("0,0,2") + + // Compact all + h.putMulti(1, "a", "z") + h.tablesPerLevel("0,1,2") + h.compactRange("", "") + h.tablesPerLevel("0,0,1") +} + +func TestDb_BloomFilter(t *testing.T) { + h := newDbHarnessWopt(t, &opt.Options{ + BlockCache: opt.NoCache, + Filter: filter.NewBloomFilter(10), + }) + defer h.close() + + key := func(i int) string { + return fmt.Sprintf("key%06d", i) + } + + n := 10000 + + // Populate multiple layers + for i := 0; i < n; i++ { + h.put(key(i), key(i)) + } + h.compactMem() + h.compactRange("a", "z") + for i := 0; i < n; i += 100 { + h.put(key(i), key(i)) + } + h.compactMem() + + // Prevent auto compactions triggered by seeks + h.stor.DelaySync(storage.TypeTable) + + // Lookup present keys. Should rarely read from small sstable. + h.stor.SetReadCounter(storage.TypeTable) + for i := 0; i < n; i++ { + h.getVal(key(i), key(i)) + } + cnt := int(h.stor.ReadCounter()) + t.Logf("lookup of %d present keys yield %d sstable I/O reads", n, cnt) + + if min, max := n, n+2*n/100; cnt < min || cnt > max { + t.Errorf("num of sstable I/O reads of present keys not in range of %d - %d, got %d", min, max, cnt) + } + + // Lookup missing keys. Should rarely read from either sstable. + h.stor.ResetReadCounter() + for i := 0; i < n; i++ { + h.get(key(i)+".missing", false) + } + cnt = int(h.stor.ReadCounter()) + t.Logf("lookup of %d missing keys yield %d sstable I/O reads", n, cnt) + if max := 3 * n / 100; cnt > max { + t.Errorf("num of sstable I/O reads of missing keys was more than %d, got %d", max, cnt) + } + + h.stor.ReleaseSync(storage.TypeTable) +} + +func TestDb_Concurrent(t *testing.T) { + const n, secs, maxkey = 4, 2, 1000 + + runtime.GOMAXPROCS(n) + trun(t, func(h *dbHarness) { + var closeWg sync.WaitGroup + var stop uint32 + var cnt [n]uint32 + + for i := 0; i < n; i++ { + closeWg.Add(1) + go func(i int) { + var put, get, found uint + defer func() { + t.Logf("goroutine %d stopped after %d ops, put=%d get=%d found=%d missing=%d", + i, cnt[i], put, get, found, get-found) + closeWg.Done() + }() + + rnd := rand.New(rand.NewSource(int64(1000 + i))) + for atomic.LoadUint32(&stop) == 0 { + x := cnt[i] + + k := rnd.Intn(maxkey) + kstr := fmt.Sprintf("%016d", k) + + if (rnd.Int() % 2) > 0 { + put++ + h.put(kstr, fmt.Sprintf("%d.%d.%-1000d", k, i, x)) + } else { + get++ + v, err := h.db.Get([]byte(kstr), h.ro) + if err == nil { + found++ + rk, ri, rx := 0, -1, uint32(0) + fmt.Sscanf(string(v), "%d.%d.%d", &rk, &ri, &rx) + if rk != k { + t.Errorf("invalid key want=%d got=%d", k, rk) + } + if ri < 0 || ri >= n { + t.Error("invalid goroutine number: ", ri) + } else { + tx := atomic.LoadUint32(&(cnt[ri])) + if rx > tx { + t.Errorf("invalid seq number, %d > %d ", rx, tx) + } + } + } else if err != ErrNotFound { + t.Error("Get: got error: ", err) + return + } + } + atomic.AddUint32(&cnt[i], 1) + } + }(i) + } + + time.Sleep(secs * time.Second) + atomic.StoreUint32(&stop, 1) + closeWg.Wait() + }) + + runtime.GOMAXPROCS(1) +} + +func TestDb_Concurrent2(t *testing.T) { + const n, n2 = 4, 4000 + + runtime.GOMAXPROCS(n*2 + 2) + truno(t, &opt.Options{WriteBuffer: 30}, func(h *dbHarness) { + var closeWg sync.WaitGroup + var stop uint32 + + for i := 0; i < n; i++ { + closeWg.Add(1) + go func(i int) { + for k := 0; atomic.LoadUint32(&stop) == 0; k++ { + h.put(fmt.Sprintf("k%d", k), fmt.Sprintf("%d.%d.", k, i)+strings.Repeat("x", 10)) + } + closeWg.Done() + }(i) + } + + for i := 0; i < n; i++ { + closeWg.Add(1) + go func(i int) { + for k := 1000000; k < 0 || atomic.LoadUint32(&stop) == 0; k-- { + h.put(fmt.Sprintf("k%d", k), fmt.Sprintf("%d.%d.", k, i)+strings.Repeat("x", 10)) + } + closeWg.Done() + }(i) + } + + cmp := comparer.DefaultComparer + for i := 0; i < n2; i++ { + closeWg.Add(1) + go func(i int) { + it := h.db.NewIterator(nil, nil) + var pk []byte + for it.Next() { + kk := it.Key() + if cmp.Compare(kk, pk) <= 0 { + t.Errorf("iter %d: %q is successor of %q", i, pk, kk) + } + pk = append(pk[:0], kk...) + var k, vk, vi int + if n, err := fmt.Sscanf(string(it.Key()), "k%d", &k); err != nil { + t.Errorf("iter %d: Scanf error on key %q: %v", i, it.Key(), err) + } else if n < 1 { + t.Errorf("iter %d: Cannot parse key %q", i, it.Key()) + } + if n, err := fmt.Sscanf(string(it.Value()), "%d.%d", &vk, &vi); err != nil { + t.Errorf("iter %d: Scanf error on value %q: %v", i, it.Value(), err) + } else if n < 2 { + t.Errorf("iter %d: Cannot parse value %q", i, it.Value()) + } + + if vk != k { + t.Errorf("iter %d: invalid value i=%d, want=%d got=%d", i, vi, k, vk) + } + } + if err := it.Error(); err != nil { + t.Errorf("iter %d: Got error: %v", i, err) + } + it.Release() + closeWg.Done() + }(i) + } + + atomic.StoreUint32(&stop, 1) + closeWg.Wait() + }) + + runtime.GOMAXPROCS(1) +} + +func TestDb_CreateReopenDbOnFile(t *testing.T) { + dbpath := filepath.Join(os.TempDir(), fmt.Sprintf("goleveldbtestCreateReopenDbOnFile-%d", os.Getuid())) + if err := os.RemoveAll(dbpath); err != nil { + t.Fatal("cannot remove old db: ", err) + } + defer os.RemoveAll(dbpath) + + for i := 0; i < 3; i++ { + stor, err := storage.OpenFile(dbpath) + if err != nil { + t.Fatalf("(%d) cannot open storage: %s", i, err) + } + db, err := Open(stor, nil) + if err != nil { + t.Fatalf("(%d) cannot open db: %s", i, err) + } + if err := db.Put([]byte("foo"), []byte("bar"), nil); err != nil { + t.Fatalf("(%d) cannot write to db: %s", i, err) + } + if err := db.Close(); err != nil { + t.Fatalf("(%d) cannot close db: %s", i, err) + } + if err := stor.Close(); err != nil { + t.Fatalf("(%d) cannot close storage: %s", i, err) + } + } +} + +func TestDb_CreateReopenDbOnFile2(t *testing.T) { + dbpath := filepath.Join(os.TempDir(), fmt.Sprintf("goleveldbtestCreateReopenDbOnFile2-%d", os.Getuid())) + if err := os.RemoveAll(dbpath); err != nil { + t.Fatal("cannot remove old db: ", err) + } + defer os.RemoveAll(dbpath) + + for i := 0; i < 3; i++ { + db, err := OpenFile(dbpath, nil) + if err != nil { + t.Fatalf("(%d) cannot open db: %s", i, err) + } + if err := db.Put([]byte("foo"), []byte("bar"), nil); err != nil { + t.Fatalf("(%d) cannot write to db: %s", i, err) + } + if err := db.Close(); err != nil { + t.Fatalf("(%d) cannot close db: %s", i, err) + } + } +} + +func TestDb_DeletionMarkersOnMemdb(t *testing.T) { + h := newDbHarness(t) + defer h.close() + + h.put("foo", "v1") + h.compactMem() + h.delete("foo") + h.get("foo", false) + h.getKeyVal("") +} + +func TestDb_LeveldbIssue178(t *testing.T) { + nKeys := (kMaxTableSize / 30) * 5 + key1 := func(i int) string { + return fmt.Sprintf("my_key_%d", i) + } + key2 := func(i int) string { + return fmt.Sprintf("my_key_%d_xxx", i) + } + + // Disable compression since it affects the creation of layers and the + // code below is trying to test against a very specific scenario. + h := newDbHarnessWopt(t, &opt.Options{Compression: opt.NoCompression}) + defer h.close() + + // Create first key range. + batch := new(Batch) + for i := 0; i < nKeys; i++ { + batch.Put([]byte(key1(i)), []byte("value for range 1 key")) + } + h.write(batch) + + // Create second key range. + batch.Reset() + for i := 0; i < nKeys; i++ { + batch.Put([]byte(key2(i)), []byte("value for range 2 key")) + } + h.write(batch) + + // Delete second key range. + batch.Reset() + for i := 0; i < nKeys; i++ { + batch.Delete([]byte(key2(i))) + } + h.write(batch) + h.waitMemCompaction() + + // Run manual compaction. + h.compactRange(key1(0), key1(nKeys-1)) + + // Checking the keys. + h.assertNumKeys(nKeys) +} + +func TestDb_LeveldbIssue200(t *testing.T) { + h := newDbHarness(t) + defer h.close() + + h.put("1", "b") + h.put("2", "c") + h.put("3", "d") + h.put("4", "e") + h.put("5", "f") + + iter := h.db.NewIterator(nil, h.ro) + + // Add an element that should not be reflected in the iterator. + h.put("25", "cd") + + iter.Seek([]byte("5")) + assertBytes(t, []byte("5"), iter.Key()) + iter.Prev() + assertBytes(t, []byte("4"), iter.Key()) + iter.Prev() + assertBytes(t, []byte("3"), iter.Key()) + iter.Next() + assertBytes(t, []byte("4"), iter.Key()) + iter.Next() + assertBytes(t, []byte("5"), iter.Key()) +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_util.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_util.go new file mode 100644 index 000000000..2db007e27 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_util.go @@ -0,0 +1,95 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "errors" + + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/storage" + "github.com/syndtr/goleveldb/leveldb/util" +) + +// Reader is the interface that wraps basic Get and NewIterator methods. +// This interface implemented by both DB and Snapshot. +type Reader interface { + Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) + NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator +} + +type Sizes []uint64 + +// Sum returns sum of the sizes. +func (p Sizes) Sum() (n uint64) { + for _, s := range p { + n += s + } + return n +} + +// Check and clean files. +func (d *DB) checkAndCleanFiles() error { + s := d.s + + v := s.version_NB() + tables := make(map[uint64]bool) + for _, tt := range v.tables { + for _, t := range tt { + tables[t.file.Num()] = false + } + } + + ff, err := s.getFiles(storage.TypeAll) + if err != nil { + return err + } + + var nTables int + var rem []storage.File + for _, f := range ff { + keep := true + switch f.Type() { + case storage.TypeManifest: + keep = f.Num() >= s.manifestFile.Num() + case storage.TypeJournal: + if d.frozenJournalFile != nil { + keep = f.Num() >= d.frozenJournalFile.Num() + } else { + keep = f.Num() >= d.journalFile.Num() + } + case storage.TypeTable: + _, keep = tables[f.Num()] + if keep { + tables[f.Num()] = true + nTables++ + } + } + + if !keep { + rem = append(rem, f) + } + } + + if nTables != len(tables) { + for num, present := range tables { + if !present { + s.logf("db@janitor table missing @%d", num) + } + } + return ErrCorrupted{Type: MissingFiles, Err: errors.New("leveldb: table files missing")} + } + + s.logf("db@janitor F·%d G·%d", len(ff), len(rem)) + for _, f := range rem { + s.logf("db@janitor removing %s-%d", f.Type(), f.Num()) + if err := f.Remove(); err != nil { + return err + } + } + return nil +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_write.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_write.go new file mode 100644 index 000000000..4660e840c --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_write.go @@ -0,0 +1,279 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "time" + + "github.com/syndtr/goleveldb/leveldb/memdb" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/util" +) + +func (d *DB) writeJournal(b *Batch) error { + w, err := d.journal.Next() + if err != nil { + return err + } + if _, err := w.Write(b.encode()); err != nil { + return err + } + if err := d.journal.Flush(); err != nil { + return err + } + if b.sync { + return d.journalWriter.Sync() + } + return nil +} + +func (d *DB) jWriter() { + defer d.closeW.Done() + for { + select { + case b := <-d.journalC: + if b != nil { + d.journalAckC <- d.writeJournal(b) + } + case _, _ = <-d.closeC: + return + } + } +} + +func (d *DB) rotateMem(n int) (mem *memdb.DB, err error) { + // Wait for pending memdb compaction. + err = d.compSendIdle(d.mcompCmdC) + if err != nil { + return + } + + // Create new memdb and journal. + mem, err = d.newMem(n) + if err != nil { + return + } + + // Schedule memdb compaction. + d.compTrigger(d.mcompTriggerC) + return +} + +func (d *DB) flush(n int) (mem *memdb.DB, nn int, err error) { + s := d.s + + delayed := false + flush := func() bool { + v := s.version() + defer v.release() + mem = d.getEffectiveMem() + nn = mem.Free() + switch { + case v.tLen(0) >= kL0_SlowdownWritesTrigger && !delayed: + delayed = true + time.Sleep(time.Millisecond) + case nn >= n: + return false + case v.tLen(0) >= kL0_StopWritesTrigger: + delayed = true + err = d.compSendIdle(d.tcompCmdC) + if err != nil { + return false + } + default: + // Allow memdb to grow if it has no entry. + if mem.Len() == 0 { + nn = n + return false + } + mem, err = d.rotateMem(n) + nn = mem.Free() + return false + } + return true + } + start := time.Now() + for flush() { + } + if delayed { + s.logf("db@write delayed T·%v", time.Since(start)) + } + return +} + +// Write apply the given batch to the DB. The batch will be applied +// sequentially. +// +// It is safe to modify the contents of the arguments after Write returns. +func (d *DB) Write(b *Batch, wo *opt.WriteOptions) (err error) { + err = d.ok() + if err != nil || b == nil || b.len() == 0 { + return + } + + b.init(wo.GetSync()) + + // The write happen synchronously. +retry: + select { + case d.writeC <- b: + if <-d.writeMergedC { + return <-d.writeAckC + } + goto retry + case d.writeLockC <- struct{}{}: + case _, _ = <-d.closeC: + return ErrClosed + } + + merged := 0 + defer func() { + <-d.writeLockC + for i := 0; i < merged; i++ { + d.writeAckC <- err + } + }() + + mem, memFree, err := d.flush(b.size()) + if err != nil { + return + } + + // Calculate maximum size of the batch. + m := 1 << 20 + if x := b.size(); x <= 128<<10 { + m = x + (128 << 10) + } + m = minInt(m, memFree) + + // Merge with other batch. +drain: + for b.size() < m && !b.sync { + select { + case nb := <-d.writeC: + if b.size()+nb.size() <= m { + b.append(nb) + d.writeMergedC <- true + merged++ + } else { + d.writeMergedC <- false + break drain + } + default: + break drain + } + } + + // Set batch first seq number relative from last seq. + b.seq = d.seq + 1 + + // Write journal concurrently if it is large enough. + if b.size() >= (128 << 10) { + // Push the write batch to the journal writer + select { + case _, _ = <-d.closeC: + err = ErrClosed + return + case d.journalC <- b: + // Write into memdb + b.memReplay(mem) + } + // Wait for journal writer + select { + case _, _ = <-d.closeC: + err = ErrClosed + return + case err = <-d.journalAckC: + if err != nil { + // Revert memdb if error detected + b.revertMemReplay(mem) + return + } + } + } else { + err = d.writeJournal(b) + if err != nil { + return + } + b.memReplay(mem) + } + + // Set last seq number. + d.addSeq(uint64(b.len())) + + if b.size() >= memFree { + d.rotateMem(0) + } + return +} + +// Put sets the value for the given key. It overwrites any previous value +// for that key; a DB is not a multi-map. +// +// It is safe to modify the contents of the arguments after Put returns. +func (d *DB) Put(key, value []byte, wo *opt.WriteOptions) error { + b := new(Batch) + b.Put(key, value) + return d.Write(b, wo) +} + +// Delete deletes the value for the given key. It returns ErrNotFound if +// the DB does not contain the key. +// +// It is safe to modify the contents of the arguments after Delete returns. +func (d *DB) Delete(key []byte, wo *opt.WriteOptions) error { + b := new(Batch) + b.Delete(key) + return d.Write(b, wo) +} + +func isMemOverlaps(icmp *iComparer, mem *memdb.DB, min, max []byte) bool { + iter := mem.NewIterator(nil) + defer iter.Release() + return (max == nil || (iter.First() && icmp.uCompare(max, iKey(iter.Key()).ukey()) >= 0)) && + (min == nil || (iter.Last() && icmp.uCompare(min, iKey(iter.Key()).ukey()) <= 0)) +} + +// CompactRange compacts the underlying DB for the given key range. +// In particular, deleted and overwritten versions are discarded, +// and the data is rearranged to reduce the cost of operations +// needed to access the data. This operation should typically only +// be invoked by users who understand the underlying implementation. +// +// A nil Range.Start is treated as a key before all keys in the DB. +// And a nil Range.Limit is treated as a key after all keys in the DB. +// Therefore if both is nil then it will compact entire DB. +func (d *DB) CompactRange(r util.Range) error { + if err := d.ok(); err != nil { + return err + } + + select { + case d.writeLockC <- struct{}{}: + case _, _ = <-d.closeC: + return ErrClosed + } + + // Check for overlaps in memdb. + mem := d.getEffectiveMem() + if isMemOverlaps(d.s.icmp, mem, r.Start, r.Limit) { + // Memdb compaction. + if _, err := d.rotateMem(0); err != nil { + <-d.writeLockC + return err + } + <-d.writeLockC + if err := d.compSendIdle(d.mcompCmdC); err != nil { + return err + } + } else { + <-d.writeLockC + } + + // Table compaction. + return d.compSendRange(d.tcompCmdC, -1, r.Start, r.Limit) +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/doc.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/doc.go new file mode 100644 index 000000000..ac9ea3d0c --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/doc.go @@ -0,0 +1,80 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package leveldb provides implementation of LevelDB key/value database. +// +// Create or open a database: +// +// db, err := leveldb.OpenFile("path/to/db", nil) +// ... +// defer db.Close() +// ... +// +// Read or modify the database content: +// +// // Remember that the contents of the returned slice should not be modified. +// data, err := db.Get([]byte("key"), nil) +// ... +// err = db.Put([]byte("key"), []byte("value"), nil) +// ... +// err = db.Delete([]byte("key"), nil) +// ... +// +// Iterate over database content: +// +// iter := db.NewIterator(nil, nil) +// for iter.Next() { +// // Remember that the contents of the returned slice should not be modified, and +// // only valid until the next call to Next. +// key := iter.Key() +// value := iter.Value() +// ... +// } +// iter.Release() +// err = iter.Error() +// ... +// +// Seek-then-Iterate: +// +// iter := db.NewIterator(nil, nil) +// for ok := iter.Seek(key); ok; ok = iter.Next() { +// // Use key/value. +// ... +// } +// iter.Release() +// err = iter.Error() +// ... +// +// Iterate over subset of database content: +// +// iter := db.NewIterator(&util.Range{Start: []byte("foo"), Limit: []byte("xoo")}, nil) +// for iter.Next() { +// // Use key/value. +// ... +// } +// iter.Release() +// err = iter.Error() +// ... +// +// Batch writes: +// +// batch := new(leveldb.Batch) +// batch.Put([]byte("foo"), []byte("value")) +// batch.Put([]byte("bar"), []byte("another value")) +// batch.Delete([]byte("baz")) +// err = db.Write(batch, nil) +// ... +// +// Use bloom filter: +// +// o := &opt.Options{ +// Filter: filter.NewBloomFilter(10), +// } +// db, err := leveldb.OpenFile("path/to/db", o) +// ... +// defer db.Close() +// ... +package leveldb diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/error.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/error.go new file mode 100644 index 000000000..8066bd9a4 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/error.go @@ -0,0 +1,38 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "errors" + + "github.com/syndtr/goleveldb/leveldb/util" +) + +var ( + ErrNotFound = util.ErrNotFound + ErrSnapshotReleased = errors.New("leveldb: snapshot released") + ErrIterReleased = errors.New("leveldb: iterator released") + ErrClosed = errors.New("leveldb: closed") +) + +type CorruptionType int + +const ( + CorruptedManifest CorruptionType = iota + MissingFiles +) + +// ErrCorrupted is the type that wraps errors that indicate corruption in +// the database. +type ErrCorrupted struct { + Type CorruptionType + Err error +} + +func (e ErrCorrupted) Error() string { + return e.Err.Error() +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/external_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/external_test.go new file mode 100644 index 000000000..d7dff04b6 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/external_test.go @@ -0,0 +1,58 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/testutil" +) + +var _ = testutil.Defer(func() { + Describe("Leveldb external", func() { + o := &opt.Options{ + BlockCache: opt.NoCache, + BlockRestartInterval: 5, + BlockSize: 50, + Compression: opt.NoCompression, + MaxOpenFiles: 0, + Strict: opt.StrictAll, + WriteBuffer: 1000, + } + + Describe("write test", func() { + It("should do write correctly", func(done Done) { + db := newTestingDB(o, nil, nil) + t := testutil.DBTesting{ + DB: db, + Deleted: testutil.KeyValue_Generate(nil, 500, 1, 50, 5, 5).Clone(), + } + testutil.DoDBTesting(&t) + db.TestClose() + done <- true + }, 9.0) + }) + + Describe("read test", func() { + testutil.AllKeyValueTesting(nil, func(kv testutil.KeyValue) testutil.DB { + // Building the DB. + db := newTestingDB(o, nil, nil) + kv.IterateShuffled(nil, func(i int, key, value []byte) { + err := db.TestPut(key, value) + Expect(err).NotTo(HaveOccurred()) + }) + testutil.Defer("teardown", func() { + db.TestClose() + }) + + return db + }) + }) + }) +}) diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter.go new file mode 100644 index 000000000..37c1e146b --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter.go @@ -0,0 +1,31 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "github.com/syndtr/goleveldb/leveldb/filter" +) + +type iFilter struct { + filter.Filter +} + +func (f iFilter) Contains(filter, key []byte) bool { + return f.Filter.Contains(filter, iKey(key).ukey()) +} + +func (f iFilter) NewGenerator() filter.FilterGenerator { + return iFilterGenerator{f.Filter.NewGenerator()} +} + +type iFilterGenerator struct { + filter.FilterGenerator +} + +func (g iFilterGenerator) Add(key []byte) { + g.FilterGenerator.Add(iKey(key).ukey()) +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter/bloom.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter/bloom.go new file mode 100644 index 000000000..bab0e9970 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter/bloom.go @@ -0,0 +1,116 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package filter + +import ( + "github.com/syndtr/goleveldb/leveldb/util" +) + +func bloomHash(key []byte) uint32 { + return util.Hash(key, 0xbc9f1d34) +} + +type bloomFilter int + +// The bloom filter serializes its parameters and is backward compatible +// with respect to them. Therefor, its parameters are not added to its +// name. +func (bloomFilter) Name() string { + return "leveldb.BuiltinBloomFilter" +} + +func (f bloomFilter) Contains(filter, key []byte) bool { + nBytes := len(filter) - 1 + if nBytes < 1 { + return false + } + nBits := uint32(nBytes * 8) + + // Use the encoded k so that we can read filters generated by + // bloom filters created using different parameters. + k := filter[nBytes] + if k > 30 { + // Reserved for potentially new encodings for short bloom filters. + // Consider it a match. + return true + } + + kh := bloomHash(key) + delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits + for j := uint8(0); j < k; j++ { + bitpos := kh % nBits + if (uint32(filter[bitpos/8]) & (1 << (bitpos % 8))) == 0 { + return false + } + kh += delta + } + return true +} + +func (f bloomFilter) NewGenerator() FilterGenerator { + // Round down to reduce probing cost a little bit. + k := uint8(f * 69 / 100) // 0.69 =~ ln(2) + if k < 1 { + k = 1 + } else if k > 30 { + k = 30 + } + return &bloomFilterGenerator{ + n: int(f), + k: k, + } +} + +type bloomFilterGenerator struct { + n int + k uint8 + + keyHashes []uint32 +} + +func (g *bloomFilterGenerator) Add(key []byte) { + // Use double-hashing to generate a sequence of hash values. + // See analysis in [Kirsch,Mitzenmacher 2006]. + g.keyHashes = append(g.keyHashes, bloomHash(key)) +} + +func (g *bloomFilterGenerator) Generate(b Buffer) { + // Compute bloom filter size (in both bits and bytes) + nBits := uint32(len(g.keyHashes) * g.n) + // For small n, we can see a very high false positive rate. Fix it + // by enforcing a minimum bloom filter length. + if nBits < 64 { + nBits = 64 + } + nBytes := (nBits + 7) / 8 + nBits = nBytes * 8 + + dest := b.Alloc(int(nBytes) + 1) + dest[nBytes] = g.k + for _, kh := range g.keyHashes { + delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits + for j := uint8(0); j < g.k; j++ { + bitpos := kh % nBits + dest[bitpos/8] |= (1 << (bitpos % 8)) + kh += delta + } + } + + g.keyHashes = g.keyHashes[:0] +} + +// NewBloomFilter creates a new initialized bloom filter for given +// bitsPerKey. +// +// Since bitsPerKey is persisted individually for each bloom filter +// serialization, bloom filters are backwards compatible with respect to +// changing bitsPerKey. This means that no big performance penalty will +// be experienced when changing the parameter. See documentation for +// opt.Options.Filter for more information. +func NewBloomFilter(bitsPerKey int) Filter { + return bloomFilter(bitsPerKey) +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter/bloom_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter/bloom_test.go new file mode 100644 index 000000000..1fb56f071 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter/bloom_test.go @@ -0,0 +1,142 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package filter + +import ( + "encoding/binary" + "github.com/syndtr/goleveldb/leveldb/util" + "testing" +) + +type harness struct { + t *testing.T + + bloom Filter + generator FilterGenerator + filter []byte +} + +func newHarness(t *testing.T) *harness { + bloom := NewBloomFilter(10) + return &harness{ + t: t, + bloom: bloom, + generator: bloom.NewGenerator(), + } +} + +func (h *harness) add(key []byte) { + h.generator.Add(key) +} + +func (h *harness) addNum(key uint32) { + var b [4]byte + binary.LittleEndian.PutUint32(b[:], key) + h.add(b[:]) +} + +func (h *harness) build() { + b := &util.Buffer{} + h.generator.Generate(b) + h.filter = b.Bytes() +} + +func (h *harness) reset() { + h.filter = nil +} + +func (h *harness) filterLen() int { + return len(h.filter) +} + +func (h *harness) assert(key []byte, want, silent bool) bool { + got := h.bloom.Contains(h.filter, key) + if !silent && got != want { + h.t.Errorf("assert on '%v' failed got '%v', want '%v'", key, got, want) + } + return got +} + +func (h *harness) assertNum(key uint32, want, silent bool) bool { + var b [4]byte + binary.LittleEndian.PutUint32(b[:], key) + return h.assert(b[:], want, silent) +} + +func TestBloomFilter_Empty(t *testing.T) { + h := newHarness(t) + h.build() + h.assert([]byte("hello"), false, false) + h.assert([]byte("world"), false, false) +} + +func TestBloomFilter_Small(t *testing.T) { + h := newHarness(t) + h.add([]byte("hello")) + h.add([]byte("world")) + h.build() + h.assert([]byte("hello"), true, false) + h.assert([]byte("world"), true, false) + h.assert([]byte("x"), false, false) + h.assert([]byte("foo"), false, false) +} + +func nextN(n int) int { + switch { + case n < 10: + n += 1 + case n < 100: + n += 10 + case n < 1000: + n += 100 + default: + n += 1000 + } + return n +} + +func TestBloomFilter_VaryingLengths(t *testing.T) { + h := newHarness(t) + var mediocre, good int + for n := 1; n < 10000; n = nextN(n) { + h.reset() + for i := 0; i < n; i++ { + h.addNum(uint32(i)) + } + h.build() + + got := h.filterLen() + want := (n * 10 / 8) + 40 + if got > want { + t.Errorf("filter len test failed, '%d' > '%d'", got, want) + } + + for i := 0; i < n; i++ { + h.assertNum(uint32(i), true, false) + } + + var rate float32 + for i := 0; i < 10000; i++ { + if h.assertNum(uint32(i+1000000000), true, true) { + rate++ + } + } + rate /= 10000 + if rate > 0.02 { + t.Errorf("false positive rate is more than 2%%, got %v, at len %d", rate, n) + } + if rate > 0.0125 { + mediocre++ + } else { + good++ + } + } + t.Logf("false positive rate: %d good, %d mediocre", good, mediocre) + if mediocre > good/5 { + t.Error("mediocre false positive rate is more than expected") + } +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter/filter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter/filter.go new file mode 100644 index 000000000..7a925c5a8 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter/filter.go @@ -0,0 +1,60 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package filter provides interface and implementation of probabilistic +// data structure. +// +// The filter is resposible for creating small filter from a set of keys. +// These filter will then used to test whether a key is a member of the set. +// In many cases, a filter can cut down the number of disk seeks from a +// handful to a single disk seek per DB.Get call. +package filter + +// Buffer is the interface that wraps basic Alloc, Write and WriteByte methods. +type Buffer interface { + // Alloc allocs n bytes of slice from the buffer. This also advancing + // write offset. + Alloc(n int) []byte + + // Write appends the contents of p to the buffer. + Write(p []byte) (n int, err error) + + // WriteByte appends the byte c to the buffer. + WriteByte(c byte) error +} + +// Filter is the filter. +type Filter interface { + // Name returns the name of this policy. + // + // Note that if the filter encoding changes in an incompatible way, + // the name returned by this method must be changed. Otherwise, old + // incompatible filters may be passed to methods of this type. + Name() string + + // NewGenerator creates a new filter generator. + NewGenerator() FilterGenerator + + // Contains returns true if the filter contains the given key. + // + // The filter are filters generated by the filter generator. + Contains(filter, key []byte) bool +} + +// FilterGenerator is the filter generator. +type FilterGenerator interface { + // Add adds a key to the filter generator. + // + // The key may become invalid after call to this method end, therefor + // key must be copied if implementation require keeping key for later + // use. The key should not modified directly, doing so may cause + // undefined results. + Add(key []byte) + + // Generate generates filters based on keys passed so far. After call + // to Generate the filter generator maybe resetted, depends on implementation. + Generate(b Buffer) +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go new file mode 100644 index 000000000..9b4b72741 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go @@ -0,0 +1,158 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package iterator + +import ( + "github.com/syndtr/goleveldb/leveldb/util" +) + +// BasicArray is the interface that wraps basic Len and Search method. +type BasicArray interface { + // Len returns length of the array. + Len() int + + // Search finds smallest index that point to a key that is greater + // than or equal to the given key. + Search(key []byte) int +} + +// Array is the interface that wraps BasicArray and basic Index method. +type Array interface { + BasicArray + + // Index returns key/value pair with index of i. + Index(i int) (key, value []byte) +} + +// Array is the interface that wraps BasicArray and basic Get method. +type ArrayIndexer interface { + BasicArray + + // Get returns a new data iterator with index of i. + Get(i int) Iterator +} + +type basicArrayIterator struct { + util.BasicReleaser + array BasicArray + pos int +} + +func (i *basicArrayIterator) Valid() bool { + return i.pos >= 0 && i.pos < i.array.Len() +} + +func (i *basicArrayIterator) First() bool { + if i.array.Len() == 0 { + i.pos = -1 + return false + } + i.pos = 0 + return true +} + +func (i *basicArrayIterator) Last() bool { + n := i.array.Len() + if n == 0 { + i.pos = 0 + return false + } + i.pos = n - 1 + return true +} + +func (i *basicArrayIterator) Seek(key []byte) bool { + n := i.array.Len() + if n == 0 { + i.pos = 0 + return false + } + i.pos = i.array.Search(key) + if i.pos >= n { + return false + } + return true +} + +func (i *basicArrayIterator) Next() bool { + i.pos++ + if n := i.array.Len(); i.pos >= n { + i.pos = n + return false + } + return true +} + +func (i *basicArrayIterator) Prev() bool { + i.pos-- + if i.pos < 0 { + i.pos = -1 + return false + } + return true +} + +func (i *basicArrayIterator) Error() error { return nil } + +type arrayIterator struct { + basicArrayIterator + array Array + pos int + key, value []byte +} + +func (i *arrayIterator) updateKV() { + if i.pos == i.basicArrayIterator.pos { + return + } + i.pos = i.basicArrayIterator.pos + if i.Valid() { + i.key, i.value = i.array.Index(i.pos) + } else { + i.key = nil + i.value = nil + } +} + +func (i *arrayIterator) Key() []byte { + i.updateKV() + return i.key +} + +func (i *arrayIterator) Value() []byte { + i.updateKV() + return i.value +} + +type arrayIteratorIndexer struct { + basicArrayIterator + array ArrayIndexer +} + +func (i *arrayIteratorIndexer) Get() Iterator { + if i.Valid() { + return i.array.Get(i.basicArrayIterator.pos) + } + return nil +} + +// NewArrayIterator returns an iterator from the given array. +func NewArrayIterator(array Array) Iterator { + return &arrayIterator{ + basicArrayIterator: basicArrayIterator{array: array, pos: -1}, + array: array, + pos: -1, + } +} + +// NewArrayIndexer returns an index iterator from the given array. +func NewArrayIndexer(array ArrayIndexer) IteratorIndexer { + return &arrayIteratorIndexer{ + basicArrayIterator: basicArrayIterator{array: array, pos: -1}, + array: array, + } +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/array_iter_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/array_iter_test.go new file mode 100644 index 000000000..1ed6d07cb --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/array_iter_test.go @@ -0,0 +1,30 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package iterator_test + +import ( + . "github.com/onsi/ginkgo" + + . "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/testutil" +) + +var _ = testutil.Defer(func() { + Describe("Array iterator", func() { + It("Should iterates and seeks correctly", func() { + // Build key/value. + kv := testutil.KeyValue_Generate(nil, 70, 1, 5, 3, 3) + + // Test the iterator. + t := testutil.IteratorTesting{ + KeyValue: kv.Clone(), + Iter: NewArrayIterator(kv), + } + testutil.DoIteratorTesting(&t) + }) + }) +}) diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go new file mode 100644 index 000000000..1e99a2bf6 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go @@ -0,0 +1,221 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package iterator + +import ( + "github.com/syndtr/goleveldb/leveldb/util" +) + +// IteratorIndexer is the interface that wraps CommonIterator and basic Get +// method. IteratorIndexer provides index for indexed iterator. +type IteratorIndexer interface { + CommonIterator + + // Get returns a new data iterator for the current position, or nil if + // done. + Get() Iterator +} + +type indexedIterator struct { + util.BasicReleaser + index IteratorIndexer + strict bool + strictGet bool + + data Iterator + err error + errf func(err error) +} + +func (i *indexedIterator) setData() { + if i.data != nil { + i.data.Release() + } + i.data = i.index.Get() + if i.strictGet { + if err := i.data.Error(); err != nil { + i.err = err + } + } +} + +func (i *indexedIterator) clearData() { + if i.data != nil { + i.data.Release() + } + i.data = nil +} + +func (i *indexedIterator) dataErr() bool { + if i.errf != nil { + if err := i.data.Error(); err != nil { + i.errf(err) + } + } + if i.strict { + if err := i.data.Error(); err != nil { + i.err = err + return true + } + } + return false +} + +func (i *indexedIterator) Valid() bool { + return i.data != nil && i.data.Valid() +} + +func (i *indexedIterator) First() bool { + if i.err != nil { + return false + } + + if !i.index.First() { + i.clearData() + return false + } + i.setData() + return i.Next() +} + +func (i *indexedIterator) Last() bool { + if i.err != nil { + return false + } + + if !i.index.Last() { + i.clearData() + return false + } + i.setData() + if !i.data.Last() { + if i.dataErr() { + return false + } + i.clearData() + return i.Prev() + } + return true +} + +func (i *indexedIterator) Seek(key []byte) bool { + if i.err != nil { + return false + } + + if !i.index.Seek(key) { + i.clearData() + return false + } + i.setData() + if !i.data.Seek(key) { + if i.dataErr() { + return false + } + i.clearData() + return i.Next() + } + return true +} + +func (i *indexedIterator) Next() bool { + if i.err != nil { + return false + } + + switch { + case i.data != nil && !i.data.Next(): + if i.dataErr() { + return false + } + i.clearData() + fallthrough + case i.data == nil: + if !i.index.Next() { + return false + } + i.setData() + return i.Next() + } + return true +} + +func (i *indexedIterator) Prev() bool { + if i.err != nil { + return false + } + + switch { + case i.data != nil && !i.data.Prev(): + if i.dataErr() { + return false + } + i.clearData() + fallthrough + case i.data == nil: + if !i.index.Prev() { + return false + } + i.setData() + if !i.data.Last() { + if i.dataErr() { + return false + } + i.clearData() + return i.Prev() + } + } + return true +} + +func (i *indexedIterator) Key() []byte { + if i.data == nil { + return nil + } + return i.data.Key() +} + +func (i *indexedIterator) Value() []byte { + if i.data == nil { + return nil + } + return i.data.Value() +} + +func (i *indexedIterator) Release() { + i.clearData() + i.index.Release() + i.BasicReleaser.Release() +} + +func (i *indexedIterator) Error() error { + if i.err != nil { + return i.err + } + if err := i.index.Error(); err != nil { + return err + } + return nil +} + +func (i *indexedIterator) SetErrorCallback(f func(err error)) { + i.errf = f +} + +// NewIndexedIterator returns an indexed iterator. An index is iterator +// that returns another iterator, a data iterator. A data iterator is the +// iterator that contains actual key/value pairs. +// +// If strict is true then error yield by data iterator will halt the indexed +// iterator, on contrary if strict is false then the indexed iterator will +// ignore those error and move on to the next index. If strictGet is true and +// index.Get() yield an 'error iterator' then the indexed iterator will be halted. +// An 'error iterator' is iterator which its Error() method always return non-nil +// even before any 'seeks method' is called. +func NewIndexedIterator(index IteratorIndexer, strict, strictGet bool) Iterator { + return &indexedIterator{index: index, strict: strict, strictGet: strictGet} +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter_test.go new file mode 100644 index 000000000..6a89b3830 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter_test.go @@ -0,0 +1,83 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package iterator_test + +import ( + "sort" + + . "github.com/onsi/ginkgo" + + "github.com/syndtr/goleveldb/leveldb/comparer" + . "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/testutil" +) + +type keyValue struct { + key []byte + testutil.KeyValue +} + +type keyValueIndex []keyValue + +func (x keyValueIndex) Search(key []byte) int { + return sort.Search(x.Len(), func(i int) bool { + return comparer.DefaultComparer.Compare(x[i].key, key) >= 0 + }) +} + +func (x keyValueIndex) Len() int { return len(x) } +func (x keyValueIndex) Index(i int) (key, value []byte) { return x[i].key, nil } +func (x keyValueIndex) Get(i int) Iterator { return NewArrayIterator(x[i]) } + +var _ = testutil.Defer(func() { + Describe("Indexed iterator", func() { + Test := func(n ...int) func() { + if len(n) == 0 { + rnd := testutil.NewRand() + n = make([]int, rnd.Intn(17)+3) + for i := range n { + n[i] = rnd.Intn(19) + 1 + } + } + + return func() { + It("Should iterates and seeks correctly", func(done Done) { + // Build key/value. + index := make(keyValueIndex, len(n)) + sum := 0 + for _, x := range n { + sum += x + } + kv := testutil.KeyValue_Generate(nil, sum, 1, 10, 4, 4) + for i, j := 0, 0; i < len(n); i++ { + for x := n[i]; x > 0; x-- { + key, value := kv.Index(j) + index[i].key = key + index[i].Put(key, value) + j++ + } + } + + // Test the iterator. + t := testutil.IteratorTesting{ + KeyValue: kv.Clone(), + Iter: NewIndexedIterator(NewArrayIndexer(index), true, true), + } + testutil.DoIteratorTesting(&t) + done <- true + }, 1.5) + } + } + + Describe("with 100 keys", Test(100)) + Describe("with 50-50 keys", Test(50, 50)) + Describe("with 50-1 keys", Test(50, 1)) + Describe("with 50-1-50 keys", Test(50, 1, 50)) + Describe("with 1-50 keys", Test(1, 50)) + Describe("with random N-keys", Test()) + }) +}) diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter.go new file mode 100644 index 000000000..1b80184e8 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter.go @@ -0,0 +1,142 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package iterator provides interface and implementation to traverse over +// contents of a database. +package iterator + +import ( + "errors" + + "github.com/syndtr/goleveldb/leveldb/util" +) + +// IteratorSeeker is the interface that wraps the 'seeks method'. +type IteratorSeeker interface { + // First moves the iterator to the first key/value pair. If the iterator + // only contains one key/value pair then First and Last whould moves + // to the same key/value pair. + // It returns whether such pair exist. + First() bool + + // Last moves the iterator to the last key/value pair. If the iterator + // only contains one key/value pair then First and Last whould moves + // to the same key/value pair. + // It returns whether such pair exist. + Last() bool + + // Seek moves the iterator to the first key/value pair whose key is greater + // than or equal to the given key. + // It returns whether such pair exist. + // + // It is safe to modify the contents of the argument after Seek returns. + Seek(key []byte) bool + + // Next moves the iterator to the next key/value pair. + // It returns whether the iterator is exhausted. + Next() bool + + // Prev moves the iterator to the previous key/value pair. + // It returns whether the iterator is exhausted. + Prev() bool +} + +// CommonIterator is the interface that wraps common interator methods. +type CommonIterator interface { + IteratorSeeker + + // util.Releaser is the interface that wraps basic Release method. + // When called Release will releases any resources associated with the + // iterator. + util.Releaser + + // util.ReleaseSetter is the interface that wraps the basic SetReleaser + // method. + util.ReleaseSetter + + // TODO: Remove this when ready. + Valid() bool + + // Error returns any accumulated error. Exhausting all the key/value pairs + // is not considered to be an error. + Error() error +} + +// Iterator iterates over a DB's key/value pairs in key order. +// +// When encouter an error any 'seeks method' will return false and will +// yield no key/value pairs. The error can be queried by calling the Error +// method. Calling Release is still necessary. +// +// An iterator must be released after use, but it is not necessary to read +// an iterator until exhaustion. +// Also, an iterator is not necessarily goroutine-safe, but it is safe to use +// multiple iterators concurrently, with each in a dedicated goroutine. +type Iterator interface { + CommonIterator + + // Key returns the key of the current key/value pair, or nil if done. + // The caller should not modify the contents of the returned slice, and + // its contents may change on the next call to any 'seeks method'. + Key() []byte + + // Value returns the key of the current key/value pair, or nil if done. + // The caller should not modify the contents of the returned slice, and + // its contents may change on the next call to any 'seeks method'. + Value() []byte +} + +// ErrorCallbackSetter is the interface that wraps basic SetErrorCallback +// method. +// +// ErrorCallbackSetter implemented by indexed and merged iterator. +type ErrorCallbackSetter interface { + // SetErrorCallback allows set an error callback of the coresponding + // iterator. Use nil to clear the callback. + SetErrorCallback(f func(err error)) +} + +type emptyIterator struct { + releaser util.Releaser + released bool + err error +} + +func (i *emptyIterator) rErr() { + if i.err == nil && i.released { + i.err = errors.New("leveldb/iterator: iterator released") + } +} + +func (i *emptyIterator) Release() { + if i.releaser != nil { + i.releaser.Release() + i.releaser = nil + } + i.released = true +} + +func (i *emptyIterator) SetReleaser(releaser util.Releaser) { + if !i.released { + i.releaser = releaser + } +} + +func (*emptyIterator) Valid() bool { return false } +func (i *emptyIterator) First() bool { i.rErr(); return false } +func (i *emptyIterator) Last() bool { i.rErr(); return false } +func (i *emptyIterator) Seek(key []byte) bool { i.rErr(); return false } +func (i *emptyIterator) Next() bool { i.rErr(); return false } +func (i *emptyIterator) Prev() bool { i.rErr(); return false } +func (*emptyIterator) Key() []byte { return nil } +func (*emptyIterator) Value() []byte { return nil } +func (i *emptyIterator) Error() error { return i.err } + +// NewEmptyIterator creates an empty iterator. The err parameter can be +// nil, but if not nil the given err will be returned by Error method. +func NewEmptyIterator(err error) Iterator { + return &emptyIterator{err: err} +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter_suite_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter_suite_test.go new file mode 100644 index 000000000..7ec2fc6f2 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/iter_suite_test.go @@ -0,0 +1,17 @@ +package iterator_test + +import ( + "testing" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + + "github.com/syndtr/goleveldb/leveldb/testutil" +) + +func TestIterator(t *testing.T) { + testutil.RunDefer() + + RegisterFailHandler(Fail) + RunSpecs(t, "Iterator Suite") +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go new file mode 100644 index 000000000..c8314c4e5 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go @@ -0,0 +1,307 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package iterator + +import ( + "errors" + + "github.com/syndtr/goleveldb/leveldb/comparer" + "github.com/syndtr/goleveldb/leveldb/util" +) + +var ( + ErrIterReleased = errors.New("leveldb/iterator: iterator released") +) + +type dir int + +const ( + dirReleased dir = iota - 1 + dirSOI + dirEOI + dirBackward + dirForward +) + +type mergedIterator struct { + cmp comparer.Comparer + iters []Iterator + strict bool + + keys [][]byte + index int + dir dir + err error + errf func(err error) + releaser util.Releaser +} + +func assertKey(key []byte) []byte { + if key == nil { + panic("leveldb/iterator: nil key") + } + return key +} + +func (i *mergedIterator) iterErr(iter Iterator) bool { + if i.errf != nil { + if err := iter.Error(); err != nil { + i.errf(err) + } + } + if i.strict { + if err := iter.Error(); err != nil { + i.err = err + return true + } + } + return false +} + +func (i *mergedIterator) Valid() bool { + return i.err == nil && i.dir > dirEOI +} + +func (i *mergedIterator) First() bool { + if i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + for x, iter := range i.iters { + switch { + case iter.First(): + i.keys[x] = assertKey(iter.Key()) + case i.iterErr(iter): + return false + default: + i.keys[x] = nil + } + } + i.dir = dirSOI + return i.next() +} + +func (i *mergedIterator) Last() bool { + if i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + for x, iter := range i.iters { + switch { + case iter.Last(): + i.keys[x] = assertKey(iter.Key()) + case i.iterErr(iter): + return false + default: + i.keys[x] = nil + } + } + i.dir = dirEOI + return i.prev() +} + +func (i *mergedIterator) Seek(key []byte) bool { + if i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + for x, iter := range i.iters { + switch { + case iter.Seek(key): + i.keys[x] = assertKey(iter.Key()) + case i.iterErr(iter): + return false + default: + i.keys[x] = nil + } + } + i.dir = dirSOI + return i.next() +} + +func (i *mergedIterator) next() bool { + var key []byte + if i.dir == dirForward { + key = i.keys[i.index] + } + for x, tkey := range i.keys { + if tkey != nil && (key == nil || i.cmp.Compare(tkey, key) < 0) { + key = tkey + i.index = x + } + } + if key == nil { + i.dir = dirEOI + return false + } + i.dir = dirForward + return true +} + +func (i *mergedIterator) Next() bool { + if i.dir == dirEOI || i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + switch i.dir { + case dirSOI: + return i.First() + case dirBackward: + key := append([]byte{}, i.keys[i.index]...) + if !i.Seek(key) { + return false + } + return i.Next() + } + + x := i.index + iter := i.iters[x] + switch { + case iter.Next(): + i.keys[x] = assertKey(iter.Key()) + case i.iterErr(iter): + return false + default: + i.keys[x] = nil + } + return i.next() +} + +func (i *mergedIterator) prev() bool { + var key []byte + if i.dir == dirBackward { + key = i.keys[i.index] + } + for x, tkey := range i.keys { + if tkey != nil && (key == nil || i.cmp.Compare(tkey, key) > 0) { + key = tkey + i.index = x + } + } + if key == nil { + i.dir = dirSOI + return false + } + i.dir = dirBackward + return true +} + +func (i *mergedIterator) Prev() bool { + if i.dir == dirSOI || i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + switch i.dir { + case dirEOI: + return i.Last() + case dirForward: + key := append([]byte{}, i.keys[i.index]...) + for x, iter := range i.iters { + if x == i.index { + continue + } + seek := iter.Seek(key) + switch { + case seek && iter.Prev(), !seek && iter.Last(): + i.keys[x] = assertKey(iter.Key()) + case i.iterErr(iter): + return false + default: + i.keys[x] = nil + } + } + } + + x := i.index + iter := i.iters[x] + switch { + case iter.Prev(): + i.keys[x] = assertKey(iter.Key()) + case i.iterErr(iter): + return false + default: + i.keys[x] = nil + } + return i.prev() +} + +func (i *mergedIterator) Key() []byte { + if i.err != nil || i.dir <= dirEOI { + return nil + } + return i.keys[i.index] +} + +func (i *mergedIterator) Value() []byte { + if i.err != nil || i.dir <= dirEOI { + return nil + } + return i.iters[i.index].Value() +} + +func (i *mergedIterator) Release() { + if i.dir != dirReleased { + i.dir = dirReleased + for _, iter := range i.iters { + iter.Release() + } + i.iters = nil + i.keys = nil + if i.releaser != nil { + i.releaser.Release() + i.releaser = nil + } + } +} + +func (i *mergedIterator) SetReleaser(releaser util.Releaser) { + if i.dir != dirReleased { + i.releaser = releaser + } +} + +func (i *mergedIterator) Error() error { + return i.err +} + +func (i *mergedIterator) SetErrorCallback(f func(err error)) { + i.errf = f +} + +// NewMergedIterator returns an iterator that merges its input. Walking the +// resultant iterator will return all key/value pairs of all input iterators +// in strictly increasing key order, as defined by cmp. +// The input's key ranges may overlap, but there are assumed to be no duplicate +// keys: if iters[i] contains a key k then iters[j] will not contain that key k. +// None of the iters may be nil. +// +// If strict is true then error yield by any iterators will halt the merged +// iterator, on contrary if strict is false then the merged iterator will +// ignore those error and move on to the next iterator. +func NewMergedIterator(iters []Iterator, cmp comparer.Comparer, strict bool) Iterator { + return &mergedIterator{ + iters: iters, + cmp: cmp, + strict: strict, + keys: make([][]byte, len(iters)), + } +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter_test.go new file mode 100644 index 000000000..e523b63e4 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter_test.go @@ -0,0 +1,60 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package iterator_test + +import ( + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + + "github.com/syndtr/goleveldb/leveldb/comparer" + . "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/testutil" +) + +var _ = testutil.Defer(func() { + Describe("Merged iterator", func() { + Test := func(filled int, empty int) func() { + return func() { + It("Should iterates and seeks correctly", func(done Done) { + rnd := testutil.NewRand() + + // Build key/value. + filledKV := make([]testutil.KeyValue, filled) + kv := testutil.KeyValue_Generate(nil, 100, 1, 10, 4, 4) + kv.Iterate(func(i int, key, value []byte) { + filledKV[rnd.Intn(filled)].Put(key, value) + }) + + // Create itearators. + iters := make([]Iterator, filled+empty) + for i := range iters { + if empty == 0 || (rnd.Int()%2 == 0 && filled > 0) { + filled-- + Expect(filledKV[filled].Len()).ShouldNot(BeZero()) + iters[i] = NewArrayIterator(filledKV[filled]) + } else { + empty-- + iters[i] = NewEmptyIterator(nil) + } + } + + // Test the iterator. + t := testutil.IteratorTesting{ + KeyValue: kv.Clone(), + Iter: NewMergedIterator(iters, comparer.DefaultComparer, true), + } + testutil.DoIteratorTesting(&t) + done <- true + }, 1.5) + } + } + + Describe("with three, all filled iterators", Test(3, 0)) + Describe("with one filled, one empty iterators", Test(1, 1)) + Describe("with one filled, two empty iterators", Test(1, 2)) + }) +}) diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal.go new file mode 100644 index 000000000..b522c76e6 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal.go @@ -0,0 +1,513 @@ +// Copyright 2011 The LevelDB-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Taken from: https://code.google.com/p/leveldb-go/source/browse/leveldb/record/record.go?r=1d5ccbe03246da926391ee12d1c6caae054ff4b0 +// License, authors and contributors informations can be found at bellow URLs respectively: +// https://code.google.com/p/leveldb-go/source/browse/LICENSE +// https://code.google.com/p/leveldb-go/source/browse/AUTHORS +// https://code.google.com/p/leveldb-go/source/browse/CONTRIBUTORS + +// Package journal reads and writes sequences of journals. Each journal is a stream +// of bytes that completes before the next journal starts. +// +// When reading, call Next to obtain an io.Reader for the next journal. Next will +// return io.EOF when there are no more journals. It is valid to call Next +// without reading the current journal to exhaustion. +// +// When writing, call Next to obtain an io.Writer for the next journal. Calling +// Next finishes the current journal. Call Close to finish the final journal. +// +// Optionally, call Flush to finish the current journal and flush the underlying +// writer without starting a new journal. To start a new journal after flushing, +// call Next. +// +// Neither Readers or Writers are safe to use concurrently. +// +// Example code: +// func read(r io.Reader) ([]string, error) { +// var ss []string +// journals := journal.NewReader(r, nil, true, true) +// for { +// j, err := journals.Next() +// if err == io.EOF { +// break +// } +// if err != nil { +// return nil, err +// } +// s, err := ioutil.ReadAll(j) +// if err != nil { +// return nil, err +// } +// ss = append(ss, string(s)) +// } +// return ss, nil +// } +// +// func write(w io.Writer, ss []string) error { +// journals := journal.NewWriter(w) +// for _, s := range ss { +// j, err := journals.Next() +// if err != nil { +// return err +// } +// if _, err := j.Write([]byte(s)), err != nil { +// return err +// } +// } +// return journals.Close() +// } +// +// The wire format is that the stream is divided into 32KiB blocks, and each +// block contains a number of tightly packed chunks. Chunks cannot cross block +// boundaries. The last block may be shorter than 32 KiB. Any unused bytes in a +// block must be zero. +// +// A journal maps to one or more chunks. Each chunk has a 7 byte header (a 4 +// byte checksum, a 2 byte little-endian uint16 length, and a 1 byte chunk type) +// followed by a payload. The checksum is over the chunk type and the payload. +// +// There are four chunk types: whether the chunk is the full journal, or the +// first, middle or last chunk of a multi-chunk journal. A multi-chunk journal +// has one first chunk, zero or more middle chunks, and one last chunk. +// +// The wire format allows for limited recovery in the face of data corruption: +// on a format error (such as a checksum mismatch), the reader moves to the +// next block and looks for the next full or first chunk. +package journal + +import ( + "encoding/binary" + "errors" + "fmt" + "io" + + "github.com/syndtr/goleveldb/leveldb/util" +) + +// These constants are part of the wire format and should not be changed. +const ( + fullChunkType = 1 + firstChunkType = 2 + middleChunkType = 3 + lastChunkType = 4 +) + +const ( + blockSize = 32 * 1024 + headerSize = 7 +) + +type flusher interface { + Flush() error +} + +// DroppedError is the error type that passed to Dropper.Drop method. +type DroppedError struct { + Size int + Reason string +} + +func (e DroppedError) Error() string { + return fmt.Sprintf("leveldb/journal: dropped %d bytes: %s", e.Size, e.Reason) +} + +// Dropper is the interface that wrap simple Drop method. The Drop +// method will be called when the journal reader dropping a chunk. +type Dropper interface { + Drop(err error) +} + +// Reader reads journals from an underlying io.Reader. +type Reader struct { + // r is the underlying reader. + r io.Reader + // the dropper. + dropper Dropper + // strict flag. + strict bool + // checksum flag. + checksum bool + // seq is the sequence number of the current journal. + seq int + // buf[i:j] is the unread portion of the current chunk's payload. + // The low bound, i, excludes the chunk header. + i, j int + // n is the number of bytes of buf that are valid. Once reading has started, + // only the final block can have n < blockSize. + n int + // last is whether the current chunk is the last chunk of the journal. + last bool + // err is any accumulated error. + err error + // buf is the buffer. + buf [blockSize]byte +} + +// NewReader returns a new reader. The dropper may be nil, and if +// strict is true then corrupted or invalid chunk will halt the journal +// reader entirely. +func NewReader(r io.Reader, dropper Dropper, strict, checksum bool) *Reader { + return &Reader{ + r: r, + dropper: dropper, + strict: strict, + checksum: checksum, + last: true, + } +} + +// nextChunk sets r.buf[r.i:r.j] to hold the next chunk's payload, reading the +// next block into the buffer if necessary. +func (r *Reader) nextChunk(wantFirst, skip bool) error { + for { + if r.j+headerSize <= r.n { + checksum := binary.LittleEndian.Uint32(r.buf[r.j+0 : r.j+4]) + length := binary.LittleEndian.Uint16(r.buf[r.j+4 : r.j+6]) + chunkType := r.buf[r.j+6] + + var err error + if checksum == 0 && length == 0 && chunkType == 0 { + // Drop entire block. + err = DroppedError{r.n - r.j, "zero header"} + r.i = r.n + r.j = r.n + } else { + m := r.n - r.j + r.i = r.j + headerSize + r.j = r.j + headerSize + int(length) + if r.j > r.n { + // Drop entire block. + err = DroppedError{m, "chunk length overflows block"} + r.i = r.n + r.j = r.n + } else if r.checksum && checksum != util.NewCRC(r.buf[r.i-1:r.j]).Value() { + // Drop entire block. + err = DroppedError{m, "checksum mismatch"} + r.i = r.n + r.j = r.n + } + } + if wantFirst && err == nil && chunkType != fullChunkType && chunkType != firstChunkType { + if skip { + // The chunk are intentionally skipped. + if chunkType == lastChunkType { + skip = false + } + continue + } else { + // Drop the chunk. + err = DroppedError{r.j - r.i + headerSize, "orphan chunk"} + } + } + if err == nil { + r.last = chunkType == fullChunkType || chunkType == lastChunkType + } else { + if r.dropper != nil { + r.dropper.Drop(err) + } + if r.strict { + r.err = err + } + } + return err + } + if r.n < blockSize && r.n > 0 { + // This is the last block. + if r.j != r.n { + r.err = io.ErrUnexpectedEOF + } else { + r.err = io.EOF + } + return r.err + } + n, err := io.ReadFull(r.r, r.buf[:]) + if err != nil && err != io.ErrUnexpectedEOF { + r.err = err + return r.err + } + if n == 0 { + r.err = io.EOF + return r.err + } + r.i, r.j, r.n = 0, 0, n + } +} + +// Next returns a reader for the next journal. It returns io.EOF if there are no +// more journals. The reader returned becomes stale after the next Next call, +// and should no longer be used. +func (r *Reader) Next() (io.Reader, error) { + r.seq++ + if r.err != nil { + return nil, r.err + } + skip := !r.last + for { + r.i = r.j + if r.nextChunk(true, skip) != nil { + // So that 'orphan chunk' drop will be reported. + skip = false + } else { + break + } + if r.err != nil { + return nil, r.err + } + } + return &singleReader{r, r.seq, nil}, nil +} + +// Reset resets the journal reader, allows reuse of the journal reader. +func (r *Reader) Reset(reader io.Reader, dropper Dropper, strict, checksum bool) error { + r.seq++ + err := r.err + r.r = reader + r.dropper = dropper + r.strict = strict + r.checksum = checksum + r.i = 0 + r.j = 0 + r.n = 0 + r.last = true + r.err = nil + return err +} + +type singleReader struct { + r *Reader + seq int + err error +} + +func (x *singleReader) Read(p []byte) (int, error) { + r := x.r + if r.seq != x.seq { + return 0, errors.New("leveldb/journal: stale reader") + } + if x.err != nil { + return 0, x.err + } + if r.err != nil { + return 0, r.err + } + for r.i == r.j { + if r.last { + return 0, io.EOF + } + if x.err = r.nextChunk(false, false); x.err != nil { + return 0, x.err + } + } + n := copy(p, r.buf[r.i:r.j]) + r.i += n + return n, nil +} + +func (x *singleReader) ReadByte() (byte, error) { + r := x.r + if r.seq != x.seq { + return 0, errors.New("leveldb/journal: stale reader") + } + if x.err != nil { + return 0, x.err + } + if r.err != nil { + return 0, r.err + } + for r.i == r.j { + if r.last { + return 0, io.EOF + } + if x.err = r.nextChunk(false, false); x.err != nil { + return 0, x.err + } + } + c := r.buf[r.i] + r.i++ + return c, nil +} + +// Writer writes journals to an underlying io.Writer. +type Writer struct { + // w is the underlying writer. + w io.Writer + // seq is the sequence number of the current journal. + seq int + // f is w as a flusher. + f flusher + // buf[i:j] is the bytes that will become the current chunk. + // The low bound, i, includes the chunk header. + i, j int + // buf[:written] has already been written to w. + // written is zero unless Flush has been called. + written int + // first is whether the current chunk is the first chunk of the journal. + first bool + // pending is whether a chunk is buffered but not yet written. + pending bool + // err is any accumulated error. + err error + // buf is the buffer. + buf [blockSize]byte +} + +// NewWriter returns a new Writer. +func NewWriter(w io.Writer) *Writer { + f, _ := w.(flusher) + return &Writer{ + w: w, + f: f, + } +} + +// fillHeader fills in the header for the pending chunk. +func (w *Writer) fillHeader(last bool) { + if w.i+headerSize > w.j || w.j > blockSize { + panic("leveldb/journal: bad writer state") + } + if last { + if w.first { + w.buf[w.i+6] = fullChunkType + } else { + w.buf[w.i+6] = lastChunkType + } + } else { + if w.first { + w.buf[w.i+6] = firstChunkType + } else { + w.buf[w.i+6] = middleChunkType + } + } + binary.LittleEndian.PutUint32(w.buf[w.i+0:w.i+4], util.NewCRC(w.buf[w.i+6:w.j]).Value()) + binary.LittleEndian.PutUint16(w.buf[w.i+4:w.i+6], uint16(w.j-w.i-headerSize)) +} + +// writeBlock writes the buffered block to the underlying writer, and reserves +// space for the next chunk's header. +func (w *Writer) writeBlock() { + _, w.err = w.w.Write(w.buf[w.written:]) + w.i = 0 + w.j = headerSize + w.written = 0 +} + +// writePending finishes the current journal and writes the buffer to the +// underlying writer. +func (w *Writer) writePending() { + if w.err != nil { + return + } + if w.pending { + w.fillHeader(true) + w.pending = false + } + _, w.err = w.w.Write(w.buf[w.written:w.j]) + w.written = w.j +} + +// Close finishes the current journal and closes the writer. +func (w *Writer) Close() error { + w.seq++ + w.writePending() + if w.err != nil { + return w.err + } + w.err = errors.New("leveldb/journal: closed Writer") + return nil +} + +// Flush finishes the current journal, writes to the underlying writer, and +// flushes it if that writer implements interface{ Flush() error }. +func (w *Writer) Flush() error { + w.seq++ + w.writePending() + if w.err != nil { + return w.err + } + if w.f != nil { + w.err = w.f.Flush() + return w.err + } + return nil +} + +// Reset resets the journal writer, allows reuse of the journal writer. Reset +// will also closes the journal writer if not already. +func (w *Writer) Reset(writer io.Writer) (err error) { + w.seq++ + if w.err == nil { + w.writePending() + err = w.err + } + w.w = writer + w.f, _ = writer.(flusher) + w.i = 0 + w.j = 0 + w.written = 0 + w.first = false + w.pending = false + w.err = nil + return +} + +// Next returns a writer for the next journal. The writer returned becomes stale +// after the next Close, Flush or Next call, and should no longer be used. +func (w *Writer) Next() (io.Writer, error) { + w.seq++ + if w.err != nil { + return nil, w.err + } + if w.pending { + w.fillHeader(true) + } + w.i = w.j + w.j = w.j + headerSize + // Check if there is room in the block for the header. + if w.j > blockSize { + // Fill in the rest of the block with zeroes. + for k := w.i; k < blockSize; k++ { + w.buf[k] = 0 + } + w.writeBlock() + if w.err != nil { + return nil, w.err + } + } + w.first = true + w.pending = true + return singleWriter{w, w.seq}, nil +} + +type singleWriter struct { + w *Writer + seq int +} + +func (x singleWriter) Write(p []byte) (int, error) { + w := x.w + if w.seq != x.seq { + return 0, errors.New("leveldb/journal: stale writer") + } + if w.err != nil { + return 0, w.err + } + n0 := len(p) + for len(p) > 0 { + // Write a block, if it is full. + if w.j == blockSize { + w.fillHeader(false) + w.writeBlock() + if w.err != nil { + return 0, w.err + } + w.first = false + } + // Copy bytes into the buffer. + n := copy(w.buf[w.j:], p) + w.j += n + p = p[n:] + } + return n0, nil +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal_test.go new file mode 100644 index 000000000..5e1193ae2 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/journal/journal_test.go @@ -0,0 +1,328 @@ +// Copyright 2011 The LevelDB-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Taken from: https://code.google.com/p/leveldb-go/source/browse/leveldb/record/record_test.go?r=df1fa28f7f3be6c3935548169002309c12967135 +// License, authors and contributors informations can be found at bellow URLs respectively: +// https://code.google.com/p/leveldb-go/source/browse/LICENSE +// https://code.google.com/p/leveldb-go/source/browse/AUTHORS +// https://code.google.com/p/leveldb-go/source/browse/CONTRIBUTORS + +package journal + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "math/rand" + "strings" + "testing" +) + +type dropper struct { + t *testing.T +} + +func (d dropper) Drop(err error) { + d.t.Log(err) +} + +func short(s string) string { + if len(s) < 64 { + return s + } + return fmt.Sprintf("%s...(skipping %d bytes)...%s", s[:20], len(s)-40, s[len(s)-20:]) +} + +// big returns a string of length n, composed of repetitions of partial. +func big(partial string, n int) string { + return strings.Repeat(partial, n/len(partial)+1)[:n] +} + +func TestEmpty(t *testing.T) { + buf := new(bytes.Buffer) + r := NewReader(buf, dropper{t}, true, true) + if _, err := r.Next(); err != io.EOF { + t.Fatalf("got %v, want %v", err, io.EOF) + } +} + +func testGenerator(t *testing.T, reset func(), gen func() (string, bool)) { + buf := new(bytes.Buffer) + + reset() + w := NewWriter(buf) + for { + s, ok := gen() + if !ok { + break + } + ww, err := w.Next() + if err != nil { + t.Fatal(err) + } + if _, err := ww.Write([]byte(s)); err != nil { + t.Fatal(err) + } + } + if err := w.Close(); err != nil { + t.Fatal(err) + } + + reset() + r := NewReader(buf, dropper{t}, true, true) + for { + s, ok := gen() + if !ok { + break + } + rr, err := r.Next() + if err != nil { + t.Fatal(err) + } + x, err := ioutil.ReadAll(rr) + if err != nil { + t.Fatal(err) + } + if string(x) != s { + t.Fatalf("got %q, want %q", short(string(x)), short(s)) + } + } + if _, err := r.Next(); err != io.EOF { + t.Fatalf("got %v, want %v", err, io.EOF) + } +} + +func testLiterals(t *testing.T, s []string) { + var i int + reset := func() { + i = 0 + } + gen := func() (string, bool) { + if i == len(s) { + return "", false + } + i++ + return s[i-1], true + } + testGenerator(t, reset, gen) +} + +func TestMany(t *testing.T) { + const n = 1e5 + var i int + reset := func() { + i = 0 + } + gen := func() (string, bool) { + if i == n { + return "", false + } + i++ + return fmt.Sprintf("%d.", i-1), true + } + testGenerator(t, reset, gen) +} + +func TestRandom(t *testing.T) { + const n = 1e2 + var ( + i int + r *rand.Rand + ) + reset := func() { + i, r = 0, rand.New(rand.NewSource(0)) + } + gen := func() (string, bool) { + if i == n { + return "", false + } + i++ + return strings.Repeat(string(uint8(i)), r.Intn(2*blockSize+16)), true + } + testGenerator(t, reset, gen) +} + +func TestBasic(t *testing.T) { + testLiterals(t, []string{ + strings.Repeat("a", 1000), + strings.Repeat("b", 97270), + strings.Repeat("c", 8000), + }) +} + +func TestBoundary(t *testing.T) { + for i := blockSize - 16; i < blockSize+16; i++ { + s0 := big("abcd", i) + for j := blockSize - 16; j < blockSize+16; j++ { + s1 := big("ABCDE", j) + testLiterals(t, []string{s0, s1}) + testLiterals(t, []string{s0, "", s1}) + testLiterals(t, []string{s0, "x", s1}) + } + } +} + +func TestFlush(t *testing.T) { + buf := new(bytes.Buffer) + w := NewWriter(buf) + // Write a couple of records. Everything should still be held + // in the record.Writer buffer, so that buf.Len should be 0. + w0, _ := w.Next() + w0.Write([]byte("0")) + w1, _ := w.Next() + w1.Write([]byte("11")) + if got, want := buf.Len(), 0; got != want { + t.Fatalf("buffer length #0: got %d want %d", got, want) + } + // Flush the record.Writer buffer, which should yield 17 bytes. + // 17 = 2*7 + 1 + 2, which is two headers and 1 + 2 payload bytes. + if err := w.Flush(); err != nil { + t.Fatal(err) + } + if got, want := buf.Len(), 17; got != want { + t.Fatalf("buffer length #1: got %d want %d", got, want) + } + // Do another write, one that isn't large enough to complete the block. + // The write should not have flowed through to buf. + w2, _ := w.Next() + w2.Write(bytes.Repeat([]byte("2"), 10000)) + if got, want := buf.Len(), 17; got != want { + t.Fatalf("buffer length #2: got %d want %d", got, want) + } + // Flushing should get us up to 10024 bytes written. + // 10024 = 17 + 7 + 10000. + if err := w.Flush(); err != nil { + t.Fatal(err) + } + if got, want := buf.Len(), 10024; got != want { + t.Fatalf("buffer length #3: got %d want %d", got, want) + } + // Do a bigger write, one that completes the current block. + // We should now have 32768 bytes (a complete block), without + // an explicit flush. + w3, _ := w.Next() + w3.Write(bytes.Repeat([]byte("3"), 40000)) + if got, want := buf.Len(), 32768; got != want { + t.Fatalf("buffer length #4: got %d want %d", got, want) + } + // Flushing should get us up to 50038 bytes written. + // 50038 = 10024 + 2*7 + 40000. There are two headers because + // the one record was split into two chunks. + if err := w.Flush(); err != nil { + t.Fatal(err) + } + if got, want := buf.Len(), 50038; got != want { + t.Fatalf("buffer length #5: got %d want %d", got, want) + } + // Check that reading those records give the right lengths. + r := NewReader(buf, dropper{t}, true, true) + wants := []int64{1, 2, 10000, 40000} + for i, want := range wants { + rr, _ := r.Next() + n, err := io.Copy(ioutil.Discard, rr) + if err != nil { + t.Fatalf("read #%d: %v", i, err) + } + if n != want { + t.Fatalf("read #%d: got %d bytes want %d", i, n, want) + } + } +} + +func TestNonExhaustiveRead(t *testing.T) { + const n = 100 + buf := new(bytes.Buffer) + p := make([]byte, 10) + rnd := rand.New(rand.NewSource(1)) + + w := NewWriter(buf) + for i := 0; i < n; i++ { + length := len(p) + rnd.Intn(3*blockSize) + s := string(uint8(i)) + "123456789abcdefgh" + ww, _ := w.Next() + ww.Write([]byte(big(s, length))) + } + if err := w.Close(); err != nil { + t.Fatal(err) + } + + r := NewReader(buf, dropper{t}, true, true) + for i := 0; i < n; i++ { + rr, _ := r.Next() + _, err := io.ReadFull(rr, p) + if err != nil { + t.Fatal(err) + } + want := string(uint8(i)) + "123456789" + if got := string(p); got != want { + t.Fatalf("read #%d: got %q want %q", i, got, want) + } + } +} + +func TestStaleReader(t *testing.T) { + buf := new(bytes.Buffer) + + w := NewWriter(buf) + w0, err := w.Next() + if err != nil { + t.Fatal(err) + } + w0.Write([]byte("0")) + w1, err := w.Next() + if err != nil { + t.Fatal(err) + } + w1.Write([]byte("11")) + if err := w.Close(); err != nil { + t.Fatal(err) + } + + r := NewReader(buf, dropper{t}, true, true) + r0, err := r.Next() + if err != nil { + t.Fatal(err) + } + r1, err := r.Next() + if err != nil { + t.Fatal(err) + } + p := make([]byte, 1) + if _, err := r0.Read(p); err == nil || !strings.Contains(err.Error(), "stale") { + t.Fatalf("stale read #0: unexpected error: %v", err) + } + if _, err := r1.Read(p); err != nil { + t.Fatalf("fresh read #1: got %v want nil error", err) + } + if p[0] != '1' { + t.Fatalf("fresh read #1: byte contents: got '%c' want '1'", p[0]) + } +} + +func TestStaleWriter(t *testing.T) { + buf := new(bytes.Buffer) + + w := NewWriter(buf) + w0, err := w.Next() + if err != nil { + t.Fatal(err) + } + w1, err := w.Next() + if err != nil { + t.Fatal(err) + } + if _, err := w0.Write([]byte("0")); err == nil || !strings.Contains(err.Error(), "stale") { + t.Fatalf("stale write #0: unexpected error: %v", err) + } + if _, err := w1.Write([]byte("11")); err != nil { + t.Fatalf("fresh write #1: got %v want nil error", err) + } + if err := w.Flush(); err != nil { + t.Fatalf("flush: %v", err) + } + if _, err := w1.Write([]byte("0")); err == nil || !strings.Contains(err.Error(), "stale") { + t.Fatalf("stale write #1: unexpected error: %v", err) + } +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key.go new file mode 100644 index 000000000..b9acf932d --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key.go @@ -0,0 +1,139 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "encoding/binary" + "fmt" +) + +type vType int + +func (t vType) String() string { + switch t { + case tDel: + return "d" + case tVal: + return "v" + } + return "x" +} + +// Value types encoded as the last component of internal keys. +// Don't modify; this value are saved to disk. +const ( + tDel vType = iota + tVal +) + +// tSeek defines the vType that should be passed when constructing an +// internal key for seeking to a particular sequence number (since we +// sort sequence numbers in decreasing order and the value type is +// embedded as the low 8 bits in the sequence number in internal keys, +// we need to use the highest-numbered ValueType, not the lowest). +const tSeek = tVal + +const ( + // Maximum value possible for sequence number; the 8-bits are + // used by value type, so its can packed together in single + // 64-bit integer. + kMaxSeq uint64 = (uint64(1) << 56) - 1 + // Maximum value possible for packed sequence number and type. + kMaxNum uint64 = (kMaxSeq << 8) | uint64(tSeek) +) + +// Maximum number encoded in bytes. +var kMaxNumBytes = make([]byte, 8) + +func init() { + binary.LittleEndian.PutUint64(kMaxNumBytes, kMaxNum) +} + +type iKey []byte + +func newIKey(ukey []byte, seq uint64, t vType) iKey { + if seq > kMaxSeq || t > tVal { + panic("invalid seq number or value type") + } + + b := make(iKey, len(ukey)+8) + copy(b, ukey) + binary.LittleEndian.PutUint64(b[len(ukey):], (seq<<8)|uint64(t)) + return b +} + +func parseIkey(p []byte) (ukey []byte, seq uint64, t vType, ok bool) { + if len(p) < 8 { + return + } + num := binary.LittleEndian.Uint64(p[len(p)-8:]) + seq, t = uint64(num>>8), vType(num&0xff) + if t > tVal { + return + } + ukey = p[:len(p)-8] + ok = true + return +} + +func validIkey(p []byte) bool { + _, _, _, ok := parseIkey(p) + return ok +} + +func (p iKey) assert() { + if p == nil { + panic("nil iKey") + } + if len(p) < 8 { + panic(fmt.Sprintf("invalid iKey %q, len=%d", []byte(p), len(p))) + } +} + +func (p iKey) ok() bool { + if len(p) < 8 { + return false + } + _, _, ok := p.parseNum() + return ok +} + +func (p iKey) ukey() []byte { + p.assert() + return p[:len(p)-8] +} + +func (p iKey) num() uint64 { + p.assert() + return binary.LittleEndian.Uint64(p[len(p)-8:]) +} + +func (p iKey) parseNum() (seq uint64, t vType, ok bool) { + if p == nil { + panic("nil iKey") + } + if len(p) < 8 { + return + } + num := p.num() + seq, t = uint64(num>>8), vType(num&0xff) + if t > tVal { + return 0, 0, false + } + ok = true + return +} + +func (p iKey) String() string { + if len(p) == 0 { + return "<nil>" + } + if seq, t, ok := p.parseNum(); ok { + return fmt.Sprintf("%s,%s%d", shorten(string(p.ukey())), t, seq) + } + return "<invalid>" +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key_test.go new file mode 100644 index 000000000..e307cfc1d --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key_test.go @@ -0,0 +1,123 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "bytes" + "testing" + + "github.com/syndtr/goleveldb/leveldb/comparer" +) + +var defaultIComparer = &iComparer{comparer.DefaultComparer} + +func ikey(key string, seq uint64, t vType) iKey { + return newIKey([]byte(key), uint64(seq), t) +} + +func shortSep(a, b []byte) []byte { + dst := make([]byte, len(a)) + dst = defaultIComparer.Separator(dst[:0], a, b) + if dst == nil { + return a + } + return dst +} + +func shortSuccessor(b []byte) []byte { + dst := make([]byte, len(b)) + dst = defaultIComparer.Successor(dst[:0], b) + if dst == nil { + return b + } + return dst +} + +func testSingleKey(t *testing.T, key string, seq uint64, vt vType) { + ik := ikey(key, seq, vt) + + if !bytes.Equal(ik.ukey(), []byte(key)) { + t.Errorf("user key does not equal, got %v, want %v", string(ik.ukey()), key) + } + + if rseq, rt, ok := ik.parseNum(); ok { + if rseq != seq { + t.Errorf("seq number does not equal, got %v, want %v", rseq, seq) + } + + if rt != vt { + t.Errorf("type does not equal, got %v, want %v", rt, vt) + } + } else { + t.Error("cannot parse seq and type") + } +} + +func TestIKey_EncodeDecode(t *testing.T) { + keys := []string{"", "k", "hello", "longggggggggggggggggggggg"} + seqs := []uint64{ + 1, 2, 3, + (1 << 8) - 1, 1 << 8, (1 << 8) + 1, + (1 << 16) - 1, 1 << 16, (1 << 16) + 1, + (1 << 32) - 1, 1 << 32, (1 << 32) + 1, + } + for _, key := range keys { + for _, seq := range seqs { + testSingleKey(t, key, seq, tVal) + testSingleKey(t, "hello", 1, tDel) + } + } +} + +func assertBytes(t *testing.T, want, got []byte) { + if !bytes.Equal(got, want) { + t.Errorf("assert failed, got %v, want %v", got, want) + } +} + +func TestIKeyShortSeparator(t *testing.T) { + // When user keys are same + assertBytes(t, ikey("foo", 100, tVal), + shortSep(ikey("foo", 100, tVal), + ikey("foo", 99, tVal))) + assertBytes(t, ikey("foo", 100, tVal), + shortSep(ikey("foo", 100, tVal), + ikey("foo", 101, tVal))) + assertBytes(t, ikey("foo", 100, tVal), + shortSep(ikey("foo", 100, tVal), + ikey("foo", 100, tVal))) + assertBytes(t, ikey("foo", 100, tVal), + shortSep(ikey("foo", 100, tVal), + ikey("foo", 100, tDel))) + + // When user keys are misordered + assertBytes(t, ikey("foo", 100, tVal), + shortSep(ikey("foo", 100, tVal), + ikey("bar", 99, tVal))) + + // When user keys are different, but correctly ordered + assertBytes(t, ikey("g", uint64(kMaxSeq), tSeek), + shortSep(ikey("foo", 100, tVal), + ikey("hello", 200, tVal))) + + // When start user key is prefix of limit user key + assertBytes(t, ikey("foo", 100, tVal), + shortSep(ikey("foo", 100, tVal), + ikey("foobar", 200, tVal))) + + // When limit user key is prefix of start user key + assertBytes(t, ikey("foobar", 100, tVal), + shortSep(ikey("foobar", 100, tVal), + ikey("foo", 200, tVal))) +} + +func TestIKeyShortestSuccessor(t *testing.T) { + assertBytes(t, ikey("g", uint64(kMaxSeq), tSeek), + shortSuccessor(ikey("foo", 100, tVal))) + assertBytes(t, ikey("\xff\xff", 100, tVal), + shortSuccessor(ikey("\xff\xff", 100, tVal))) +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/leveldb_suite_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/leveldb_suite_test.go new file mode 100644 index 000000000..245b1fd4d --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/leveldb_suite_test.go @@ -0,0 +1,20 @@ +package leveldb + +import ( + "testing" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + + "github.com/syndtr/goleveldb/leveldb/testutil" +) + +func TestLeveldb(t *testing.T) { + testutil.RunDefer() + + RegisterFailHandler(Fail) + RunSpecs(t, "Leveldb Suite") + + RegisterTestingT(t) + testutil.RunDefer("teardown") +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/bench_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/bench_test.go new file mode 100644 index 000000000..b05084caa --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/bench_test.go @@ -0,0 +1,75 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package memdb + +import ( + "encoding/binary" + "math/rand" + "testing" + + "github.com/syndtr/goleveldb/leveldb/comparer" +) + +func BenchmarkPut(b *testing.B) { + buf := make([][4]byte, b.N) + for i := range buf { + binary.LittleEndian.PutUint32(buf[i][:], uint32(i)) + } + + b.ResetTimer() + p := New(comparer.DefaultComparer, 0) + for i := range buf { + p.Put(buf[i][:], nil) + } +} + +func BenchmarkPutRandom(b *testing.B) { + buf := make([][4]byte, b.N) + for i := range buf { + binary.LittleEndian.PutUint32(buf[i][:], uint32(rand.Int())) + } + + b.ResetTimer() + p := New(comparer.DefaultComparer, 0) + for i := range buf { + p.Put(buf[i][:], nil) + } +} + +func BenchmarkGet(b *testing.B) { + buf := make([][4]byte, b.N) + for i := range buf { + binary.LittleEndian.PutUint32(buf[i][:], uint32(i)) + } + + p := New(comparer.DefaultComparer, 0) + for i := range buf { + p.Put(buf[i][:], nil) + } + + b.ResetTimer() + for i := range buf { + p.Get(buf[i][:]) + } +} + +func BenchmarkGetRandom(b *testing.B) { + buf := make([][4]byte, b.N) + for i := range buf { + binary.LittleEndian.PutUint32(buf[i][:], uint32(i)) + } + + p := New(comparer.DefaultComparer, 0) + for i := range buf { + p.Put(buf[i][:], nil) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + p.Get(buf[rand.Int()%b.N][:]) + } +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go new file mode 100644 index 000000000..7bcae992a --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go @@ -0,0 +1,450 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package memdb provides in-memory key/value database implementation. +package memdb + +import ( + "math/rand" + "sync" + + "github.com/syndtr/goleveldb/leveldb/comparer" + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/util" +) + +var ( + ErrNotFound = util.ErrNotFound +) + +const tMaxHeight = 12 + +type dbIter struct { + util.BasicReleaser + p *DB + slice *util.Range + node int + forward bool + key, value []byte +} + +func (i *dbIter) fill(checkStart, checkLimit bool) bool { + if i.node != 0 { + n := i.p.nodeData[i.node] + m := n + i.p.nodeData[i.node+nKey] + i.key = i.p.kvData[n:m] + if i.slice != nil { + switch { + case checkLimit && i.slice.Limit != nil && i.p.cmp.Compare(i.key, i.slice.Limit) >= 0: + fallthrough + case checkStart && i.slice.Start != nil && i.p.cmp.Compare(i.key, i.slice.Start) < 0: + i.node = 0 + goto bail + } + } + i.value = i.p.kvData[m : m+i.p.nodeData[i.node+nVal]] + return true + } +bail: + i.key = nil + i.value = nil + return false +} + +func (i *dbIter) Valid() bool { + return i.node != 0 +} + +func (i *dbIter) First() bool { + i.forward = true + i.p.mu.RLock() + defer i.p.mu.RUnlock() + if i.slice != nil && i.slice.Start != nil { + i.node, _ = i.p.findGE(i.slice.Start, false) + } else { + i.node = i.p.nodeData[nNext] + } + return i.fill(false, true) +} + +func (i *dbIter) Last() bool { + if i.p == nil { + return false + } + i.forward = false + i.p.mu.RLock() + defer i.p.mu.RUnlock() + if i.slice != nil && i.slice.Limit != nil { + i.node = i.p.findLT(i.slice.Limit) + } else { + i.node = i.p.findLast() + } + return i.fill(true, false) +} + +func (i *dbIter) Seek(key []byte) bool { + if i.p == nil { + return false + } + i.forward = true + i.p.mu.RLock() + defer i.p.mu.RUnlock() + if i.slice != nil && i.slice.Start != nil && i.p.cmp.Compare(key, i.slice.Start) < 0 { + key = i.slice.Start + } + i.node, _ = i.p.findGE(key, false) + return i.fill(false, true) +} + +func (i *dbIter) Next() bool { + if i.p == nil { + return false + } + if i.node == 0 { + if !i.forward { + return i.First() + } + return false + } + i.forward = true + i.p.mu.RLock() + defer i.p.mu.RUnlock() + i.node = i.p.nodeData[i.node+nNext] + return i.fill(false, true) +} + +func (i *dbIter) Prev() bool { + if i.p == nil { + return false + } + if i.node == 0 { + if i.forward { + return i.Last() + } + return false + } + i.forward = false + i.p.mu.RLock() + defer i.p.mu.RUnlock() + i.node = i.p.findLT(i.key) + return i.fill(true, false) +} + +func (i *dbIter) Key() []byte { + return i.key +} + +func (i *dbIter) Value() []byte { + return i.value +} + +func (i *dbIter) Error() error { return nil } + +func (i *dbIter) Release() { + if i.p != nil { + i.p = nil + i.node = 0 + i.key = nil + i.value = nil + i.BasicReleaser.Release() + } +} + +const ( + nKV = iota + nKey + nVal + nHeight + nNext +) + +// DB is an in-memory key/value database. +type DB struct { + cmp comparer.BasicComparer + rnd *rand.Rand + + mu sync.RWMutex + kvData []byte + // Node data: + // [0] : KV offset + // [1] : Key length + // [2] : Value length + // [3] : Height + // [3..height] : Next nodes + nodeData []int + prevNode [tMaxHeight]int + maxHeight int + n int + kvSize int +} + +func (p *DB) randHeight() (h int) { + const branching = 4 + h = 1 + for h < tMaxHeight && p.rnd.Int()%branching == 0 { + h++ + } + return +} + +func (p *DB) findGE(key []byte, prev bool) (int, bool) { + node := 0 + h := p.maxHeight - 1 + for { + next := p.nodeData[node+nNext+h] + cmp := 1 + if next != 0 { + o := p.nodeData[next] + cmp = p.cmp.Compare(p.kvData[o:o+p.nodeData[next+nKey]], key) + } + if cmp < 0 { + // Keep searching in this list + node = next + } else { + if prev { + p.prevNode[h] = node + } else if cmp == 0 { + return next, true + } + if h == 0 { + return next, cmp == 0 + } + h-- + } + } +} + +func (p *DB) findLT(key []byte) int { + node := 0 + h := p.maxHeight - 1 + for { + next := p.nodeData[node+nNext+h] + o := p.nodeData[next] + if next == 0 || p.cmp.Compare(p.kvData[o:o+p.nodeData[next+nKey]], key) >= 0 { + if h == 0 { + break + } + h-- + } else { + node = next + } + } + return node +} + +func (p *DB) findLast() int { + node := 0 + h := p.maxHeight - 1 + for { + next := p.nodeData[node+nNext+h] + if next == 0 { + if h == 0 { + break + } + h-- + } else { + node = next + } + } + return node +} + +// Put sets the value for the given key. It overwrites any previous value +// for that key; a DB is not a multi-map. +// +// It is safe to modify the contents of the arguments after Put returns. +func (p *DB) Put(key []byte, value []byte) error { + p.mu.Lock() + defer p.mu.Unlock() + + if node, exact := p.findGE(key, true); exact { + kvOffset := len(p.kvData) + p.kvData = append(p.kvData, key...) + p.kvData = append(p.kvData, value...) + p.nodeData[node] = kvOffset + m := p.nodeData[node+nVal] + p.nodeData[node+nVal] = len(value) + p.kvSize += len(value) - m + return nil + } + + h := p.randHeight() + if h > p.maxHeight { + for i := p.maxHeight; i < h; i++ { + p.prevNode[i] = 0 + } + p.maxHeight = h + } + + kvOffset := len(p.kvData) + p.kvData = append(p.kvData, key...) + p.kvData = append(p.kvData, value...) + // Node + node := len(p.nodeData) + p.nodeData = append(p.nodeData, kvOffset, len(key), len(value), h) + for i, n := range p.prevNode[:h] { + m := n + 4 + i + p.nodeData = append(p.nodeData, p.nodeData[m]) + p.nodeData[m] = node + } + + p.kvSize += len(key) + len(value) + p.n++ + return nil +} + +// Delete deletes the value for the given key. It returns ErrNotFound if +// the DB does not contain the key. +// +// It is safe to modify the contents of the arguments after Delete returns. +func (p *DB) Delete(key []byte) error { + p.mu.Lock() + defer p.mu.Unlock() + + node, exact := p.findGE(key, true) + if !exact { + return ErrNotFound + } + + h := p.nodeData[node+nHeight] + for i, n := range p.prevNode[:h] { + m := n + 4 + i + p.nodeData[m] = p.nodeData[p.nodeData[m]+nNext+i] + } + + p.kvSize -= p.nodeData[node+nKey] + p.nodeData[node+nVal] + p.n-- + return nil +} + +// Contains returns true if the given key are in the DB. +// +// It is safe to modify the contents of the arguments after Contains returns. +func (p *DB) Contains(key []byte) bool { + p.mu.RLock() + _, exact := p.findGE(key, false) + p.mu.RUnlock() + return exact +} + +// Get gets the value for the given key. It returns error.ErrNotFound if the +// DB does not contain the key. +// +// The caller should not modify the contents of the returned slice, but +// it is safe to modify the contents of the argument after Get returns. +func (p *DB) Get(key []byte) (value []byte, err error) { + p.mu.RLock() + if node, exact := p.findGE(key, false); exact { + o := p.nodeData[node] + p.nodeData[node+nKey] + value = p.kvData[o : o+p.nodeData[node+nVal]] + } else { + err = ErrNotFound + } + p.mu.RUnlock() + return +} + +// Find finds key/value pair whose key is greater than or equal to the +// given key. It returns ErrNotFound if the table doesn't contain +// such pair. +// +// The caller should not modify the contents of the returned slice, but +// it is safe to modify the contents of the argument after Find returns. +func (p *DB) Find(key []byte) (rkey, value []byte, err error) { + p.mu.RLock() + if node, _ := p.findGE(key, false); node != 0 { + n := p.nodeData[node] + m := n + p.nodeData[node+nKey] + rkey = p.kvData[n:m] + value = p.kvData[m : m+p.nodeData[node+nVal]] + } else { + err = ErrNotFound + } + p.mu.RUnlock() + return +} + +// NewIterator returns an iterator of the DB. +// The returned iterator is not goroutine-safe, but it is safe to use +// multiple iterators concurrently, with each in a dedicated goroutine. +// It is also safe to use an iterator concurrently with modifying its +// underlying DB. However, the resultant key/value pairs are not guaranteed +// to be a consistent snapshot of the DB at a particular point in time. +// +// Slice allows slicing the iterator to only contains keys in the given +// range. A nil Range.Start is treated as a key before all keys in the +// DB. And a nil Range.Limit is treated as a key after all keys in +// the DB. +// +// The iterator must be released after use, by calling Release method. +// +// Also read Iterator documentation of the leveldb/iterator package. +func (p *DB) NewIterator(slice *util.Range) iterator.Iterator { + return &dbIter{p: p, slice: slice} +} + +// Capacity returns keys/values buffer capacity. +func (p *DB) Capacity() int { + p.mu.RLock() + defer p.mu.RUnlock() + return cap(p.kvData) +} + +// Size returns sum of keys and values length. Note that deleted +// key/value will not be accouted for, but it will still consume +// the buffer, since the buffer is append only. +func (p *DB) Size() int { + p.mu.RLock() + defer p.mu.RUnlock() + return p.kvSize +} + +// Free returns keys/values free buffer before need to grow. +func (p *DB) Free() int { + p.mu.RLock() + defer p.mu.RUnlock() + return cap(p.kvData) - len(p.kvData) +} + +// Len returns the number of entries in the DB. +func (p *DB) Len() int { + p.mu.RLock() + defer p.mu.RUnlock() + return p.n +} + +// Reset resets the DB to initial empty state. Allows reuse the buffer. +func (p *DB) Reset() { + p.rnd = rand.New(rand.NewSource(0xdeadbeef)) + p.maxHeight = 1 + p.n = 0 + p.kvSize = 0 + p.kvData = p.kvData[:0] + p.nodeData = p.nodeData[:4+tMaxHeight] + p.nodeData[nKV] = 0 + p.nodeData[nKey] = 0 + p.nodeData[nVal] = 0 + p.nodeData[nHeight] = tMaxHeight + for n := 0; n < tMaxHeight; n++ { + p.nodeData[4+n] = 0 + p.prevNode[n] = 0 + } +} + +// New creates a new initalized in-memory key/value DB. The capacity +// is the initial key/value buffer capacity. The capacity is advisory, +// not enforced. +func New(cmp comparer.BasicComparer, capacity int) *DB { + p := &DB{ + cmp: cmp, + rnd: rand.New(rand.NewSource(0xdeadbeef)), + maxHeight: 1, + kvData: make([]byte, 0, capacity), + nodeData: make([]int, 4+tMaxHeight), + } + p.nodeData[nHeight] = tMaxHeight + return p +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_suite_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_suite_test.go new file mode 100644 index 000000000..788539a87 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_suite_test.go @@ -0,0 +1,17 @@ +package memdb + +import ( + "testing" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + + "github.com/syndtr/goleveldb/leveldb/testutil" +) + +func TestMemdb(t *testing.T) { + testutil.RunDefer() + + RegisterFailHandler(Fail) + RunSpecs(t, "Memdb Suite") +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_test.go new file mode 100644 index 000000000..f96a9d1ea --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/memdb/memdb_test.go @@ -0,0 +1,135 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package memdb + +import ( + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + + "github.com/syndtr/goleveldb/leveldb/comparer" + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/testutil" + "github.com/syndtr/goleveldb/leveldb/util" +) + +func (p *DB) TestFindLT(key []byte) (rkey, value []byte, err error) { + p.mu.RLock() + if node := p.findLT(key); node != 0 { + n := p.nodeData[node] + m := n + p.nodeData[node+nKey] + rkey = p.kvData[n:m] + value = p.kvData[m : m+p.nodeData[node+nVal]] + } else { + err = ErrNotFound + } + p.mu.RUnlock() + return +} + +func (p *DB) TestFindLast() (rkey, value []byte, err error) { + p.mu.RLock() + if node := p.findLast(); node != 0 { + n := p.nodeData[node] + m := n + p.nodeData[node+nKey] + rkey = p.kvData[n:m] + value = p.kvData[m : m+p.nodeData[node+nVal]] + } else { + err = ErrNotFound + } + p.mu.RUnlock() + return +} + +func (p *DB) TestPut(key []byte, value []byte) error { + p.Put(key, value) + return nil +} + +func (p *DB) TestDelete(key []byte) error { + p.Delete(key) + return nil +} + +func (p *DB) TestFind(key []byte) (rkey, rvalue []byte, err error) { + return p.Find(key) +} + +func (p *DB) TestGet(key []byte) (value []byte, err error) { + return p.Get(key) +} + +func (p *DB) TestNewIterator(slice *util.Range) iterator.Iterator { + return p.NewIterator(slice) +} + +var _ = testutil.Defer(func() { + Describe("Memdb", func() { + Describe("write test", func() { + It("should do write correctly", func() { + db := New(comparer.DefaultComparer, 0) + t := testutil.DBTesting{ + DB: db, + Deleted: testutil.KeyValue_Generate(nil, 1000, 1, 30, 5, 5).Clone(), + PostFn: func(t *testutil.DBTesting) { + Expect(db.Len()).Should(Equal(t.Present.Len())) + Expect(db.Size()).Should(Equal(t.Present.Size())) + switch t.Act { + case testutil.DBPut, testutil.DBOverwrite: + Expect(db.Contains(t.ActKey)).Should(BeTrue()) + default: + Expect(db.Contains(t.ActKey)).Should(BeFalse()) + } + }, + } + testutil.DoDBTesting(&t) + }) + }) + + Describe("read test", func() { + testutil.AllKeyValueTesting(nil, func(kv testutil.KeyValue) testutil.DB { + // Building the DB. + db := New(comparer.DefaultComparer, 0) + kv.IterateShuffled(nil, func(i int, key, value []byte) { + db.Put(key, value) + }) + + if kv.Len() > 1 { + It("Should find correct keys with findLT", func() { + testutil.ShuffledIndex(nil, kv.Len()-1, 1, func(i int) { + key_, key, _ := kv.IndexInexact(i + 1) + expectedKey, expectedValue := kv.Index(i) + + // Using key that exist. + rkey, rvalue, err := db.TestFindLT(key) + Expect(err).ShouldNot(HaveOccurred(), "Error for key %q -> %q", key, expectedKey) + Expect(rkey).Should(Equal(expectedKey), "Key") + Expect(rvalue).Should(Equal(expectedValue), "Value for key %q -> %q", key, expectedKey) + + // Using key that doesn't exist. + rkey, rvalue, err = db.TestFindLT(key_) + Expect(err).ShouldNot(HaveOccurred(), "Error for key %q (%q) -> %q", key_, key, expectedKey) + Expect(rkey).Should(Equal(expectedKey)) + Expect(rvalue).Should(Equal(expectedValue), "Value for key %q (%q) -> %q", key_, key, expectedKey) + }) + }) + } + + if kv.Len() > 0 { + It("Should find last key with findLast", func() { + key, value := kv.Index(kv.Len() - 1) + rkey, rvalue, err := db.TestFindLast() + Expect(err).ShouldNot(HaveOccurred()) + Expect(rkey).Should(Equal(key)) + Expect(rvalue).Should(Equal(value)) + }) + } + + return db + }) + }) + }) +}) diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt/options.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt/options.go new file mode 100644 index 000000000..b940ce427 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt/options.go @@ -0,0 +1,318 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package opt provides sets of options used by LevelDB. +package opt + +import ( + "github.com/syndtr/goleveldb/leveldb/cache" + "github.com/syndtr/goleveldb/leveldb/comparer" + "github.com/syndtr/goleveldb/leveldb/filter" +) + +const ( + KiB = 1024 + MiB = KiB * 1024 + GiB = MiB * 1024 +) + +const ( + DefaultBlockCacheSize = 8 * MiB + DefaultBlockRestartInterval = 16 + DefaultBlockSize = 4 * KiB + DefaultCompressionType = SnappyCompression + DefaultMaxOpenFiles = 1000 + DefaultWriteBuffer = 4 * MiB +) + +type noCache struct{} + +func (noCache) SetCapacity(capacity int) {} +func (noCache) GetNamespace(id uint64) cache.Namespace { return nil } +func (noCache) Purge(fin cache.PurgeFin) {} +func (noCache) Zap(closed bool) {} + +var NoCache cache.Cache = noCache{} + +// Compression is the per-block compression algorithm to use. +type Compression uint + +func (c Compression) String() string { + switch c { + case DefaultCompression: + return "default" + case NoCompression: + return "none" + case SnappyCompression: + return "snappy" + } + return "invalid" +} + +const ( + DefaultCompression Compression = iota + NoCompression + SnappyCompression + nCompression +) + +// Strict is the DB strict level. +type Strict uint + +const ( + // If present then a corrupted or invalid chunk or block in manifest + // journal will cause an error istead of being dropped. + StrictManifest Strict = 1 << iota + + // If present then a corrupted or invalid chunk or block in journal + // will cause an error istead of being dropped. + StrictJournal + + // If present then journal chunk checksum will be verified. + StrictJournalChecksum + + // If present then an invalid key/value pair will cause an error + // instead of being skipped. + StrictIterator + + // If present then 'sorted table' block checksum will be verified. + StrictBlockChecksum + + // StrictAll enables all strict flags. + StrictAll = StrictManifest | StrictJournal | StrictJournalChecksum | StrictIterator | StrictBlockChecksum + + // DefaultStrict is the default strict flags. Specify any strict flags + // will override default strict flags as whole (i.e. not OR'ed). + DefaultStrict = StrictJournalChecksum | StrictBlockChecksum + + // NoStrict disables all strict flags. Override default strict flags. + NoStrict = ^StrictAll +) + +// Options holds the optional parameters for the DB at large. +type Options struct { + // AltFilters defines one or more 'alternative filters'. + // 'alternative filters' will be used during reads if a filter block + // does not match with the 'effective filter'. + // + // The default value is nil + AltFilters []filter.Filter + + // BlockCache provides per-block caching for LevelDB. Specify NoCache to + // disable block caching. + // + // By default LevelDB will create LRU-cache with capacity of 8MiB. + BlockCache cache.Cache + + // BlockRestartInterval is the number of keys between restart points for + // delta encoding of keys. + // + // The default value is 16. + BlockRestartInterval int + + // BlockSize is the minimum uncompressed size in bytes of each 'sorted table' + // block. + // + // The default value is 4KiB. + BlockSize int + + // Comparer defines a total ordering over the space of []byte keys: a 'less + // than' relationship. The same comparison algorithm must be used for reads + // and writes over the lifetime of the DB. + // + // The default value uses the same ordering as bytes.Compare. + Comparer comparer.Comparer + + // Compression defines the per-block compression to use. + // + // The default value (DefaultCompression) uses snappy compression. + Compression Compression + + // ErrorIfExist defines whether an error should returned if the DB already + // exist. + // + // The default value is false. + ErrorIfExist bool + + // ErrorIfMissing defines whether an error should returned if the DB is + // missing. If false then the database will be created if missing, otherwise + // an error will be returned. + // + // The default value is false. + ErrorIfMissing bool + + // Filter defines an 'effective filter' to use. An 'effective filter' + // if defined will be used to generate per-table filter block. + // The filter name will be stored on disk. + // During reads LevelDB will try to find matching filter from + // 'effective filter' and 'alternative filters'. + // + // Filter can be changed after a DB has been created. It is recommended + // to put old filter to the 'alternative filters' to mitigate lack of + // filter during transition period. + // + // A filter is used to reduce disk reads when looking for a specific key. + // + // The default value is nil. + Filter filter.Filter + + // MaxOpenFiles defines maximum number of open files to kept around + // (cached). This is not an hard limit, actual open files may exceed + // the defined value. + // + // The default value is 1000. + MaxOpenFiles int + + // Strict defines the DB strict level. + Strict Strict + + // WriteBuffer defines maximum size of a 'memdb' before flushed to + // 'sorted table'. 'memdb' is an in-memory DB backed by an on-disk + // unsorted journal. + // + // LevelDB may held up to two 'memdb' at the same time. + // + // The default value is 4MiB. + WriteBuffer int +} + +func (o *Options) GetAltFilters() []filter.Filter { + if o == nil { + return nil + } + return o.AltFilters +} + +func (o *Options) GetBlockCache() cache.Cache { + if o == nil { + return nil + } + return o.BlockCache +} + +func (o *Options) GetBlockRestartInterval() int { + if o == nil || o.BlockRestartInterval <= 0 { + return DefaultBlockRestartInterval + } + return o.BlockRestartInterval +} + +func (o *Options) GetBlockSize() int { + if o == nil || o.BlockSize <= 0 { + return DefaultBlockSize + } + return o.BlockSize +} + +func (o *Options) GetComparer() comparer.Comparer { + if o == nil || o.Comparer == nil { + return comparer.DefaultComparer + } + return o.Comparer +} + +func (o *Options) GetCompression() Compression { + if o == nil || o.Compression <= DefaultCompression || o.Compression >= nCompression { + return DefaultCompressionType + } + return o.Compression +} + +func (o *Options) GetErrorIfExist() bool { + if o == nil { + return false + } + return o.ErrorIfExist +} + +func (o *Options) GetErrorIfMissing() bool { + if o == nil { + return false + } + return o.ErrorIfMissing +} + +func (o *Options) GetFilter() filter.Filter { + if o == nil { + return nil + } + return o.Filter +} + +func (o *Options) GetMaxOpenFiles() int { + if o == nil || o.MaxOpenFiles <= 0 { + return DefaultMaxOpenFiles + } + return o.MaxOpenFiles +} + +func (o *Options) GetStrict(strict Strict) bool { + if o == nil || o.Strict == 0 { + return DefaultStrict&strict != 0 + } + return o.Strict&strict != 0 +} + +func (o *Options) GetWriteBuffer() int { + if o == nil || o.WriteBuffer <= 0 { + return DefaultWriteBuffer + } + return o.WriteBuffer +} + +// ReadOptions holds the optional parameters for 'read operation'. The +// 'read operation' includes Get, Find and NewIterator. +type ReadOptions struct { + // DontFillCache defines whether block reads for this 'read operation' + // should be cached. If false then the block will be cached. This does + // not affects already cached block. + // + // The default value is false. + DontFillCache bool + + // Strict overrides global DB strict level. Only StrictIterator and + // StrictBlockChecksum that does have effects here. + Strict Strict +} + +func (ro *ReadOptions) GetDontFillCache() bool { + if ro == nil { + return false + } + return ro.DontFillCache +} + +func (ro *ReadOptions) GetStrict(strict Strict) bool { + if ro == nil { + return false + } + return ro.Strict&strict != 0 +} + +// WriteOptions holds the optional parameters for 'write operation'. The +// 'write operation' includes Write, Put and Delete. +type WriteOptions struct { + // Sync is whether to sync underlying writes from the OS buffer cache + // through to actual disk, if applicable. Setting Sync can result in + // slower writes. + // + // If false, and the machine crashes, then some recent writes may be lost. + // Note that if it is just the process that crashes (and the machine does + // not) then no writes will be lost. + // + // In other words, Sync being false has the same semantics as a write + // system call. Sync being true means write followed by fsync. + // + // The default value is false. + Sync bool +} + +func (wo *WriteOptions) GetSync() bool { + if wo == nil { + return false + } + return wo.Sync +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/options.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/options.go new file mode 100644 index 000000000..fc6a96965 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/options.go @@ -0,0 +1,41 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "github.com/syndtr/goleveldb/leveldb/cache" + "github.com/syndtr/goleveldb/leveldb/filter" + "github.com/syndtr/goleveldb/leveldb/opt" +) + +func (s *session) setOptions(o *opt.Options) { + s.o = &opt.Options{} + if o != nil { + *s.o = *o + } + // Alternative filters. + if filters := o.GetAltFilters(); len(filters) > 0 { + s.o.AltFilters = make([]filter.Filter, len(filters)) + for i, filter := range filters { + s.o.AltFilters[i] = &iFilter{filter} + } + } + // Block cache. + switch o.GetBlockCache() { + case nil: + s.o.BlockCache = cache.NewLRUCache(opt.DefaultBlockCacheSize) + case opt.NoCache: + s.o.BlockCache = nil + } + // Comparer. + s.icmp = &iComparer{o.GetComparer()} + s.o.Comparer = s.icmp + // Filter. + if filter := o.GetFilter(); filter != nil { + s.o.Filter = &iFilter{filter} + } +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session.go new file mode 100644 index 000000000..6b2a61683 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session.go @@ -0,0 +1,403 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "errors" + "io" + "os" + "sync" + "sync/atomic" + + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/journal" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/storage" + "github.com/syndtr/goleveldb/leveldb/util" +) + +// session represent a persistent database session. +type session struct { + // Need 64-bit alignment. + stFileNum uint64 // current unused file number + stJournalNum uint64 // current journal file number; need external synchronization + stPrevJournalNum uint64 // prev journal file number; no longer used; for compatibility with older version of leveldb + stSeq uint64 // last mem compacted seq; need external synchronization + stTempFileNum uint64 + + stor storage.Storage + storLock util.Releaser + o *opt.Options + icmp *iComparer + tops *tOps + + manifest *journal.Writer + manifestWriter storage.Writer + manifestFile storage.File + + stCPtrs [kNumLevels]iKey // compact pointers; need external synchronization + stVersion *version // current version + vmu sync.Mutex +} + +func newSession(stor storage.Storage, o *opt.Options) (s *session, err error) { + if stor == nil { + return nil, os.ErrInvalid + } + storLock, err := stor.Lock() + if err != nil { + return + } + s = &session{ + stor: stor, + storLock: storLock, + } + s.setOptions(o) + s.tops = newTableOps(s, s.o.GetMaxOpenFiles()) + s.setVersion(&version{s: s}) + s.log("log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock D·DeletedEntry L·Level Q·SeqNum T·TimeElapsed") + return +} + +// Close session. +func (s *session) close() { + s.tops.close() + if bc := s.o.GetBlockCache(); bc != nil { + bc.Purge(nil) + } + if s.manifest != nil { + s.manifest.Close() + } + if s.manifestWriter != nil { + s.manifestWriter.Close() + } + s.manifest = nil + s.manifestWriter = nil + s.manifestFile = nil + s.stVersion = nil +} + +func (s *session) release() { + s.storLock.Release() +} + +// Create a new database session; need external synchronization. +func (s *session) create() error { + // create manifest + return s.newManifest(nil, nil) +} + +// Recover a database session; need external synchronization. +func (s *session) recover() (err error) { + defer func() { + if os.IsNotExist(err) { + // Don't return os.ErrNotExist if the underlying storage contains + // other files that belong to LevelDB. So the DB won't get trashed. + if files, _ := s.stor.GetFiles(storage.TypeAll); len(files) > 0 { + err = ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest file missing")} + } + } + }() + + file, err := s.stor.GetManifest() + if err != nil { + return + } + + reader, err := file.Open() + if err != nil { + return + } + defer reader.Close() + strict := s.o.GetStrict(opt.StrictManifest) + jr := journal.NewReader(reader, dropper{s, file}, strict, true) + + staging := s.version_NB().newStaging() + rec := &sessionRecord{} + for { + var r io.Reader + r, err = jr.Next() + if err != nil { + if err == io.EOF { + err = nil + break + } + return + } + + err = rec.decode(r) + if err == nil { + // save compact pointers + for _, rp := range rec.compactionPointers { + s.stCPtrs[rp.level] = iKey(rp.key) + } + // commit record to version staging + staging.commit(rec) + } else if strict { + return ErrCorrupted{Type: CorruptedManifest, Err: err} + } else { + s.logf("manifest error: %v (skipped)", err) + } + rec.resetCompactionPointers() + rec.resetAddedTables() + rec.resetDeletedTables() + } + + switch { + case !rec.has(recComparer): + return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing comparer name")} + case rec.comparer != s.icmp.uName(): + return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: comparer mismatch, " + "want '" + s.icmp.uName() + "', " + "got '" + rec.comparer + "'")} + case !rec.has(recNextNum): + return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing next file number")} + case !rec.has(recJournalNum): + return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing journal file number")} + case !rec.has(recSeq): + return ErrCorrupted{Type: CorruptedManifest, Err: errors.New("leveldb: manifest missing seq number")} + } + + s.manifestFile = file + s.setVersion(staging.finish()) + s.setFileNum(rec.nextNum) + s.recordCommited(rec) + return nil +} + +// Commit session; need external synchronization. +func (s *session) commit(r *sessionRecord) (err error) { + // spawn new version based on current version + nv := s.version_NB().spawn(r) + + if s.manifest == nil { + // manifest journal writer not yet created, create one + err = s.newManifest(r, nv) + } else { + err = s.flushManifest(r) + } + + // finally, apply new version if no error rise + if err == nil { + s.setVersion(nv) + } + + return +} + +// Pick a compaction based on current state; need external synchronization. +func (s *session) pickCompaction() *compaction { + v := s.version_NB() + + var level int + var t0 tFiles + if v.cScore >= 1 { + level = v.cLevel + cp := s.stCPtrs[level] + tt := v.tables[level] + for _, t := range tt { + if cp == nil || s.icmp.Compare(t.max, cp) > 0 { + t0 = append(t0, t) + break + } + } + if len(t0) == 0 { + t0 = append(t0, tt[0]) + } + } else { + if p := atomic.LoadPointer(&v.cSeek); p != nil { + ts := (*tSet)(p) + level = ts.level + t0 = append(t0, ts.table) + } else { + return nil + } + } + + c := &compaction{s: s, version: v, level: level} + if level == 0 { + min, max := t0.getRange(s.icmp) + t0 = nil + v.tables[0].getOverlaps(min.ukey(), max.ukey(), &t0, false, s.icmp.ucmp) + } + + c.tables[0] = t0 + c.expand() + return c +} + +// Create compaction from given level and range; need external synchronization. +func (s *session) getCompactionRange(level int, min, max []byte) *compaction { + v := s.version_NB() + + var t0 tFiles + v.tables[level].getOverlaps(min, max, &t0, level != 0, s.icmp.ucmp) + if len(t0) == 0 { + return nil + } + + // Avoid compacting too much in one shot in case the range is large. + // But we cannot do this for level-0 since level-0 files can overlap + // and we must not pick one file and drop another older file if the + // two files overlap. + if level > 0 { + limit := uint64(kMaxTableSize) + total := uint64(0) + for i, t := range t0 { + total += t.size + if total >= limit { + s.logf("table@compaction limiting F·%d -> F·%d", len(t0), i+1) + t0 = t0[:i+1] + break + } + } + } + + c := &compaction{s: s, version: v, level: level} + c.tables[0] = t0 + c.expand() + return c +} + +// compaction represent a compaction state +type compaction struct { + s *session + version *version + + level int + tables [2]tFiles + + gp tFiles + gpidx int + seenKey bool + overlappedBytes uint64 + min, max iKey + + tPtrs [kNumLevels]int +} + +// Expand compacted tables; need external synchronization. +func (c *compaction) expand() { + s := c.s + v := c.version + + level := c.level + vt0, vt1 := v.tables[level], v.tables[level+1] + + t0, t1 := c.tables[0], c.tables[1] + min, max := t0.getRange(s.icmp) + vt1.getOverlaps(min.ukey(), max.ukey(), &t1, true, s.icmp.ucmp) + + // Get entire range covered by compaction + amin, amax := append(t0, t1...).getRange(s.icmp) + + // See if we can grow the number of inputs in "level" without + // changing the number of "level+1" files we pick up. + if len(t1) > 0 { + var exp0 tFiles + vt0.getOverlaps(amin.ukey(), amax.ukey(), &exp0, level != 0, s.icmp.ucmp) + if len(exp0) > len(t0) && t1.size()+exp0.size() < kExpCompactionMaxBytes { + var exp1 tFiles + xmin, xmax := exp0.getRange(s.icmp) + vt1.getOverlaps(xmin.ukey(), xmax.ukey(), &exp1, true, s.icmp.ucmp) + if len(exp1) == len(t1) { + s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)", + level, level+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())), + len(exp0), shortenb(int(exp0.size())), len(exp1), shortenb(int(exp1.size()))) + min, max = xmin, xmax + t0, t1 = exp0, exp1 + amin, amax = append(t0, t1...).getRange(s.icmp) + } + } + } + + // Compute the set of grandparent files that overlap this compaction + // (parent == level+1; grandparent == level+2) + if level+2 < kNumLevels { + v.tables[level+2].getOverlaps(amin.ukey(), amax.ukey(), &c.gp, true, s.icmp.ucmp) + } + + c.tables[0], c.tables[1] = t0, t1 + c.min, c.max = min, max +} + +// Check whether compaction is trivial. +func (c *compaction) trivial() bool { + return len(c.tables[0]) == 1 && len(c.tables[1]) == 0 && c.gp.size() <= kMaxGrandParentOverlapBytes +} + +func (c *compaction) isBaseLevelForKey(key []byte) bool { + s := c.s + v := c.version + + for level, tt := range v.tables[c.level+2:] { + for c.tPtrs[level] < len(tt) { + t := tt[c.tPtrs[level]] + if s.icmp.uCompare(key, t.max.ukey()) <= 0 { + // We've advanced far enough + if s.icmp.uCompare(key, t.min.ukey()) >= 0 { + // Key falls in this file's range, so definitely not base level + return false + } + break + } + c.tPtrs[level]++ + } + } + return true +} + +func (c *compaction) shouldStopBefore(key iKey) bool { + for ; c.gpidx < len(c.gp); c.gpidx++ { + gp := c.gp[c.gpidx] + if c.s.icmp.Compare(key, gp.max) <= 0 { + break + } + if c.seenKey { + c.overlappedBytes += gp.size + } + } + c.seenKey = true + + if c.overlappedBytes > kMaxGrandParentOverlapBytes { + // Too much overlap for current output; start new output + c.overlappedBytes = 0 + return true + } + return false +} + +func (c *compaction) newIterator() iterator.Iterator { + s := c.s + + level := c.level + icap := 2 + if c.level == 0 { + icap = len(c.tables[0]) + 1 + } + its := make([]iterator.Iterator, 0, icap) + + ro := &opt.ReadOptions{ + DontFillCache: true, + } + strict := s.o.GetStrict(opt.StrictIterator) + + for i, tt := range c.tables { + if len(tt) == 0 { + continue + } + + if level+i == 0 { + for _, t := range tt { + its = append(its, s.tops.newIterator(t, nil, ro)) + } + } else { + it := iterator.NewIndexedIterator(tt.newIndexIterator(s.tops, s.icmp, nil, ro), strict, true) + its = append(its, it) + } + } + + return iterator.NewMergedIterator(its, s.icmp, true) +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record.go new file mode 100644 index 000000000..c50fda737 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record.go @@ -0,0 +1,308 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "bufio" + "encoding/binary" + "errors" + "io" +) + +var errCorruptManifest = errors.New("leveldb: corrupt manifest") + +type byteReader interface { + io.Reader + io.ByteReader +} + +// These numbers are written to disk and should not be changed. +const ( + recComparer = 1 + recJournalNum = 2 + recNextNum = 3 + recSeq = 4 + recCompactionPointer = 5 + recDeletedTable = 6 + recNewTable = 7 + // 8 was used for large value refs + recPrevJournalNum = 9 +) + +type cpRecord struct { + level int + key iKey +} + +type ntRecord struct { + level int + num uint64 + size uint64 + min iKey + max iKey +} + +func (r ntRecord) makeFile(s *session) *tFile { + return newTFile(s.getTableFile(r.num), r.size, r.min, r.max) +} + +type dtRecord struct { + level int + num uint64 +} + +type sessionRecord struct { + hasRec int + comparer string + journalNum uint64 + prevJournalNum uint64 + nextNum uint64 + seq uint64 + compactionPointers []cpRecord + addedTables []ntRecord + deletedTables []dtRecord + scratch [binary.MaxVarintLen64]byte + err error +} + +func (p *sessionRecord) has(rec int) bool { + return p.hasRec&(1<<uint(rec)) != 0 +} + +func (p *sessionRecord) setComparer(name string) { + p.hasRec |= 1 << recComparer + p.comparer = name +} + +func (p *sessionRecord) setJournalNum(num uint64) { + p.hasRec |= 1 << recJournalNum + p.journalNum = num +} + +func (p *sessionRecord) setPrevJournalNum(num uint64) { + p.hasRec |= 1 << recPrevJournalNum + p.prevJournalNum = num +} + +func (p *sessionRecord) setNextNum(num uint64) { + p.hasRec |= 1 << recNextNum + p.nextNum = num +} + +func (p *sessionRecord) setSeq(seq uint64) { + p.hasRec |= 1 << recSeq + p.seq = seq +} + +func (p *sessionRecord) addCompactionPointer(level int, key iKey) { + p.hasRec |= 1 << recCompactionPointer + p.compactionPointers = append(p.compactionPointers, cpRecord{level, key}) +} + +func (p *sessionRecord) resetCompactionPointers() { + p.hasRec &= ^(1 << recCompactionPointer) + p.compactionPointers = p.compactionPointers[:0] +} + +func (p *sessionRecord) addTable(level int, num, size uint64, min, max iKey) { + p.hasRec |= 1 << recNewTable + p.addedTables = append(p.addedTables, ntRecord{level, num, size, min, max}) +} + +func (p *sessionRecord) addTableFile(level int, t *tFile) { + p.addTable(level, t.file.Num(), t.size, t.min, t.max) +} + +func (p *sessionRecord) resetAddedTables() { + p.hasRec &= ^(1 << recNewTable) + p.addedTables = p.addedTables[:0] +} + +func (p *sessionRecord) deleteTable(level int, num uint64) { + p.hasRec |= 1 << recDeletedTable + p.deletedTables = append(p.deletedTables, dtRecord{level, num}) +} + +func (p *sessionRecord) resetDeletedTables() { + p.hasRec &= ^(1 << recDeletedTable) + p.deletedTables = p.deletedTables[:0] +} + +func (p *sessionRecord) putUvarint(w io.Writer, x uint64) { + if p.err != nil { + return + } + n := binary.PutUvarint(p.scratch[:], x) + _, p.err = w.Write(p.scratch[:n]) +} + +func (p *sessionRecord) putBytes(w io.Writer, x []byte) { + if p.err != nil { + return + } + p.putUvarint(w, uint64(len(x))) + if p.err != nil { + return + } + _, p.err = w.Write(x) +} + +func (p *sessionRecord) encode(w io.Writer) error { + p.err = nil + if p.has(recComparer) { + p.putUvarint(w, recComparer) + p.putBytes(w, []byte(p.comparer)) + } + if p.has(recJournalNum) { + p.putUvarint(w, recJournalNum) + p.putUvarint(w, p.journalNum) + } + if p.has(recNextNum) { + p.putUvarint(w, recNextNum) + p.putUvarint(w, p.nextNum) + } + if p.has(recSeq) { + p.putUvarint(w, recSeq) + p.putUvarint(w, p.seq) + } + for _, cp := range p.compactionPointers { + p.putUvarint(w, recCompactionPointer) + p.putUvarint(w, uint64(cp.level)) + p.putBytes(w, cp.key) + } + for _, t := range p.deletedTables { + p.putUvarint(w, recDeletedTable) + p.putUvarint(w, uint64(t.level)) + p.putUvarint(w, t.num) + } + for _, t := range p.addedTables { + p.putUvarint(w, recNewTable) + p.putUvarint(w, uint64(t.level)) + p.putUvarint(w, t.num) + p.putUvarint(w, t.size) + p.putBytes(w, t.min) + p.putBytes(w, t.max) + } + return p.err +} + +func (p *sessionRecord) readUvarint(r io.ByteReader) uint64 { + if p.err != nil { + return 0 + } + x, err := binary.ReadUvarint(r) + if err != nil { + if err == io.EOF { + p.err = errCorruptManifest + } else { + p.err = err + } + return 0 + } + return x +} + +func (p *sessionRecord) readBytes(r byteReader) []byte { + if p.err != nil { + return nil + } + n := p.readUvarint(r) + if p.err != nil { + return nil + } + x := make([]byte, n) + _, p.err = io.ReadFull(r, x) + if p.err != nil { + if p.err == io.EOF { + p.err = errCorruptManifest + } + return nil + } + return x +} + +func (p *sessionRecord) readLevel(r io.ByteReader) int { + if p.err != nil { + return 0 + } + x := p.readUvarint(r) + if p.err != nil { + return 0 + } + if x >= kNumLevels { + p.err = errCorruptManifest + return 0 + } + return int(x) +} + +func (p *sessionRecord) decode(r io.Reader) error { + br, ok := r.(byteReader) + if !ok { + br = bufio.NewReader(r) + } + p.err = nil + for p.err == nil { + rec, err := binary.ReadUvarint(br) + if err != nil { + if err == io.EOF { + err = nil + } + return err + } + switch rec { + case recComparer: + x := p.readBytes(br) + if p.err == nil { + p.setComparer(string(x)) + } + case recJournalNum: + x := p.readUvarint(br) + if p.err == nil { + p.setJournalNum(x) + } + case recPrevJournalNum: + x := p.readUvarint(br) + if p.err == nil { + p.setPrevJournalNum(x) + } + case recNextNum: + x := p.readUvarint(br) + if p.err == nil { + p.setNextNum(x) + } + case recSeq: + x := p.readUvarint(br) + if p.err == nil { + p.setSeq(x) + } + case recCompactionPointer: + level := p.readLevel(br) + key := p.readBytes(br) + if p.err == nil { + p.addCompactionPointer(level, iKey(key)) + } + case recNewTable: + level := p.readLevel(br) + num := p.readUvarint(br) + size := p.readUvarint(br) + min := p.readBytes(br) + max := p.readBytes(br) + if p.err == nil { + p.addTable(level, num, size, min, max) + } + case recDeletedTable: + level := p.readLevel(br) + num := p.readUvarint(br) + if p.err == nil { + p.deleteTable(level, num) + } + } + } + + return p.err +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record_test.go new file mode 100644 index 000000000..029fabfe6 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record_test.go @@ -0,0 +1,62 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "bytes" + "testing" +) + +func decodeEncode(v *sessionRecord) (res bool, err error) { + b := new(bytes.Buffer) + err = v.encode(b) + if err != nil { + return + } + v2 := new(sessionRecord) + err = v.decode(b) + if err != nil { + return + } + b2 := new(bytes.Buffer) + err = v2.encode(b2) + if err != nil { + return + } + return bytes.Equal(b.Bytes(), b2.Bytes()), nil +} + +func TestSessionRecord_EncodeDecode(t *testing.T) { + big := uint64(1) << 50 + v := new(sessionRecord) + i := uint64(0) + test := func() { + res, err := decodeEncode(v) + if err != nil { + t.Fatalf("error when testing encode/decode sessionRecord: %v", err) + } + if !res { + t.Error("encode/decode test failed at iteration:", i) + } + } + + for ; i < 4; i++ { + test() + v.addTable(3, big+300+i, big+400+i, + newIKey([]byte("foo"), big+500+1, tVal), + newIKey([]byte("zoo"), big+600+1, tDel)) + v.deleteTable(4, big+700+i) + v.addCompactionPointer(int(i), newIKey([]byte("x"), big+900+1, tVal)) + } + + v.setComparer("foo") + v.setJournalNum(big + 100) + v.setPrevJournalNum(big + 99) + v.setNextNum(big + 200) + v.setSeq(big + 1000) + test() +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_util.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_util.go new file mode 100644 index 000000000..bf412b030 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_util.go @@ -0,0 +1,253 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "fmt" + "sync/atomic" + + "github.com/syndtr/goleveldb/leveldb/journal" + "github.com/syndtr/goleveldb/leveldb/storage" +) + +// logging + +type dropper struct { + s *session + file storage.File +} + +func (d dropper) Drop(err error) { + if e, ok := err.(journal.DroppedError); ok { + d.s.logf("journal@drop %s-%d S·%s %q", d.file.Type(), d.file.Num(), shortenb(e.Size), e.Reason) + } else { + d.s.logf("journal@drop %s-%d %q", d.file.Type(), d.file.Num(), err) + } +} + +func (s *session) log(v ...interface{}) { + s.stor.Log(fmt.Sprint(v...)) +} + +func (s *session) logf(format string, v ...interface{}) { + s.stor.Log(fmt.Sprintf(format, v...)) +} + +// file utils + +func (s *session) getJournalFile(num uint64) storage.File { + return s.stor.GetFile(num, storage.TypeJournal) +} + +func (s *session) getTableFile(num uint64) storage.File { + return s.stor.GetFile(num, storage.TypeTable) +} + +func (s *session) getFiles(t storage.FileType) ([]storage.File, error) { + return s.stor.GetFiles(t) +} + +func (s *session) newTemp() storage.File { + num := atomic.AddUint64(&s.stTempFileNum, 1) - 1 + return s.stor.GetFile(num, storage.TypeTemp) +} + +// session state + +// Get current version. +func (s *session) version() *version { + s.vmu.Lock() + defer s.vmu.Unlock() + s.stVersion.ref++ + return s.stVersion +} + +// Get current version; no barrier. +func (s *session) version_NB() *version { + return s.stVersion +} + +// Set current version to v. +func (s *session) setVersion(v *version) { + s.vmu.Lock() + v.ref = 1 + if old := s.stVersion; old != nil { + v.ref++ + old.next = v + old.release_NB() + } + s.stVersion = v + s.vmu.Unlock() +} + +// Get current unused file number. +func (s *session) fileNum() uint64 { + return atomic.LoadUint64(&s.stFileNum) +} + +// Get current unused file number to num. +func (s *session) setFileNum(num uint64) { + atomic.StoreUint64(&s.stFileNum, num) +} + +// Mark file number as used. +func (s *session) markFileNum(num uint64) { + num += 1 + for { + old, x := s.stFileNum, num + if old > x { + x = old + } + if atomic.CompareAndSwapUint64(&s.stFileNum, old, x) { + break + } + } +} + +// Allocate a file number. +func (s *session) allocFileNum() (num uint64) { + return atomic.AddUint64(&s.stFileNum, 1) - 1 +} + +// Reuse given file number. +func (s *session) reuseFileNum(num uint64) { + for { + old, x := s.stFileNum, num + if old != x+1 { + x = old + } + if atomic.CompareAndSwapUint64(&s.stFileNum, old, x) { + break + } + } +} + +// manifest related utils + +// Fill given session record obj with current states; need external +// synchronization. +func (s *session) fillRecord(r *sessionRecord, snapshot bool) { + r.setNextNum(s.fileNum()) + + if snapshot { + if !r.has(recJournalNum) { + r.setJournalNum(s.stJournalNum) + } + + if !r.has(recSeq) { + r.setSeq(s.stSeq) + } + + for level, ik := range s.stCPtrs { + if ik != nil { + r.addCompactionPointer(level, ik) + } + } + + r.setComparer(s.icmp.uName()) + } +} + +// Mark if record has been commited, this will update session state; +// need external synchronization. +func (s *session) recordCommited(r *sessionRecord) { + if r.has(recJournalNum) { + s.stJournalNum = r.journalNum + } + + if r.has(recPrevJournalNum) { + s.stPrevJournalNum = r.prevJournalNum + } + + if r.has(recSeq) { + s.stSeq = r.seq + } + + for _, p := range r.compactionPointers { + s.stCPtrs[p.level] = iKey(p.key) + } +} + +// Create a new manifest file; need external synchronization. +func (s *session) newManifest(rec *sessionRecord, v *version) (err error) { + num := s.allocFileNum() + file := s.stor.GetFile(num, storage.TypeManifest) + writer, err := file.Create() + if err != nil { + return + } + jw := journal.NewWriter(writer) + + if v == nil { + v = s.version_NB() + } + if rec == nil { + rec = new(sessionRecord) + } + s.fillRecord(rec, true) + v.fillRecord(rec) + + defer func() { + if err == nil { + s.recordCommited(rec) + if s.manifest != nil { + s.manifest.Close() + } + if s.manifestWriter != nil { + s.manifestWriter.Close() + } + if s.manifestFile != nil { + s.manifestFile.Remove() + } + s.manifestFile = file + s.manifestWriter = writer + s.manifest = jw + } else { + writer.Close() + file.Remove() + s.reuseFileNum(num) + } + }() + + w, err := jw.Next() + if err != nil { + return + } + err = rec.encode(w) + if err != nil { + return + } + err = jw.Flush() + if err != nil { + return + } + err = s.stor.SetManifest(file) + return +} + +// Flush record to disk. +func (s *session) flushManifest(rec *sessionRecord) (err error) { + s.fillRecord(rec, false) + w, err := s.manifest.Next() + if err != nil { + return + } + err = rec.encode(w) + if err != nil { + return + } + err = s.manifest.Flush() + if err != nil { + return + } + err = s.manifestWriter.Sync() + if err != nil { + return + } + s.recordCommited(rec) + return +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go new file mode 100644 index 000000000..75439f6db --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go @@ -0,0 +1,534 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reservefs. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package storage + +import ( + "errors" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "runtime" + "strconv" + "strings" + "sync" + "time" + + "github.com/syndtr/goleveldb/leveldb/util" +) + +var errFileOpen = errors.New("leveldb/storage: file still open") + +type fileLock interface { + release() error +} + +type fileStorageLock struct { + fs *fileStorage +} + +func (lock *fileStorageLock) Release() { + fs := lock.fs + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.slock == lock { + fs.slock = nil + } + return +} + +// fileStorage is a file-system backed storage. +type fileStorage struct { + path string + + mu sync.Mutex + flock fileLock + slock *fileStorageLock + logw *os.File + buf []byte + // Opened file counter; if open < 0 means closed. + open int + day int +} + +// OpenFile returns a new filesytem-backed storage implementation with the given +// path. This also hold a file lock, so any subsequent attempt to open the same +// path will fail. +// +// The storage must be closed after use, by calling Close method. +func OpenFile(path string) (Storage, error) { + if err := os.MkdirAll(path, 0755); err != nil { + return nil, err + } + + flock, err := newFileLock(filepath.Join(path, "LOCK")) + if err != nil { + return nil, err + } + + defer func() { + if err != nil { + flock.release() + } + }() + + rename(filepath.Join(path, "LOG"), filepath.Join(path, "LOG.old")) + logw, err := os.OpenFile(filepath.Join(path, "LOG"), os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + return nil, err + } + + fs := &fileStorage{path: path, flock: flock, logw: logw} + runtime.SetFinalizer(fs, (*fileStorage).Close) + return fs, nil +} + +func (fs *fileStorage) Lock() (util.Releaser, error) { + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return nil, ErrClosed + } + if fs.slock != nil { + return nil, ErrLocked + } + fs.slock = &fileStorageLock{fs: fs} + return fs.slock, nil +} + +func itoa(buf []byte, i int, wid int) []byte { + var u uint = uint(i) + if u == 0 && wid <= 1 { + return append(buf, '0') + } + + // Assemble decimal in reverse order. + var b [32]byte + bp := len(b) + for ; u > 0 || wid > 0; u /= 10 { + bp-- + wid-- + b[bp] = byte(u%10) + '0' + } + return append(buf, b[bp:]...) +} + +func (fs *fileStorage) printDay(t time.Time) { + if fs.day == t.Day() { + return + } + fs.day = t.Day() + fs.logw.Write([]byte("=============== " + t.Format("Jan 2, 2006 (MST)") + " ===============\n")) +} + +func (fs *fileStorage) doLog(t time.Time, str string) { + fs.printDay(t) + hour, min, sec := t.Clock() + msec := t.Nanosecond() / 1e3 + // time + fs.buf = itoa(fs.buf[:0], hour, 2) + fs.buf = append(fs.buf, ':') + fs.buf = itoa(fs.buf, min, 2) + fs.buf = append(fs.buf, ':') + fs.buf = itoa(fs.buf, sec, 2) + fs.buf = append(fs.buf, '.') + fs.buf = itoa(fs.buf, msec, 6) + fs.buf = append(fs.buf, ' ') + // write + fs.buf = append(fs.buf, []byte(str)...) + fs.buf = append(fs.buf, '\n') + fs.logw.Write(fs.buf) +} + +func (fs *fileStorage) Log(str string) { + t := time.Now() + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return + } + fs.doLog(t, str) +} + +func (fs *fileStorage) log(str string) { + fs.doLog(time.Now(), str) +} + +func (fs *fileStorage) GetFile(num uint64, t FileType) File { + return &file{fs: fs, num: num, t: t} +} + +func (fs *fileStorage) GetFiles(t FileType) (ff []File, err error) { + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return nil, ErrClosed + } + dir, err := os.Open(fs.path) + if err != nil { + return + } + fnn, err := dir.Readdirnames(0) + // Close the dir first before checking for Readdirnames error. + if err := dir.Close(); err != nil { + fs.log(fmt.Sprintf("close dir: %v", err)) + } + if err != nil { + return + } + f := &file{fs: fs} + for _, fn := range fnn { + if f.parse(fn) && (f.t&t) != 0 { + ff = append(ff, f) + f = &file{fs: fs} + } + } + return +} + +func (fs *fileStorage) GetManifest() (f File, err error) { + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return nil, ErrClosed + } + dir, err := os.Open(fs.path) + if err != nil { + return + } + fnn, err := dir.Readdirnames(0) + // Close the dir first before checking for Readdirnames error. + if err := dir.Close(); err != nil { + fs.log(fmt.Sprintf("close dir: %v", err)) + } + if err != nil { + return + } + // Find latest CURRENT file. + var rem []string + var pend bool + var cerr error + for _, fn := range fnn { + if strings.HasPrefix(fn, "CURRENT") { + pend1 := len(fn) > 7 + // Make sure it is valid name for a CURRENT file, otherwise skip it. + if pend1 { + if fn[7] != '.' || len(fn) < 9 { + fs.log(fmt.Sprintf("skipping %s: invalid file name", fn)) + continue + } + if _, e1 := strconv.ParseUint(fn[7:], 10, 0); e1 != nil { + fs.log(fmt.Sprintf("skipping %s: invalid file num: %v", fn, e1)) + continue + } + } + path := filepath.Join(fs.path, fn) + r, e1 := os.OpenFile(path, os.O_RDONLY, 0) + if e1 != nil { + return nil, e1 + } + b, e1 := ioutil.ReadAll(r) + if e1 != nil { + r.Close() + return nil, e1 + } + f1 := &file{fs: fs} + if len(b) < 1 || b[len(b)-1] != '\n' || !f1.parse(string(b[:len(b)-1])) { + fs.log(fmt.Sprintf("skipping %s: corrupted or incomplete", fn)) + if pend1 { + rem = append(rem, fn) + } + if !pend1 || cerr == nil { + cerr = fmt.Errorf("leveldb/storage: corrupted or incomplete %s file", fn) + } + } else if f != nil && f1.Num() < f.Num() { + fs.log(fmt.Sprintf("skipping %s: obsolete", fn)) + if pend1 { + rem = append(rem, fn) + } + } else { + f = f1 + pend = pend1 + } + if err := r.Close(); err != nil { + fs.log(fmt.Sprintf("close %s: %v", fn, err)) + } + } + } + // Don't remove any files if there is no valid CURRENT file. + if f == nil { + if cerr != nil { + err = cerr + } else { + err = os.ErrNotExist + } + return + } + // Rename pending CURRENT file to an effective CURRENT. + if pend { + path := fmt.Sprintf("%s.%d", filepath.Join(fs.path, "CURRENT"), f.Num()) + if err := rename(path, filepath.Join(fs.path, "CURRENT")); err != nil { + fs.log(fmt.Sprintf("CURRENT.%d -> CURRENT: %v", f.Num(), err)) + } + } + // Remove obsolete or incomplete pending CURRENT files. + for _, fn := range rem { + path := filepath.Join(fs.path, fn) + if err := os.Remove(path); err != nil { + fs.log(fmt.Sprintf("remove %s: %v", fn, err)) + } + } + return +} + +func (fs *fileStorage) SetManifest(f File) (err error) { + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return ErrClosed + } + f2, ok := f.(*file) + if !ok || f2.t != TypeManifest { + return ErrInvalidFile + } + defer func() { + if err != nil { + fs.log(fmt.Sprintf("CURRENT: %v", err)) + } + }() + path := fmt.Sprintf("%s.%d", filepath.Join(fs.path, "CURRENT"), f2.Num()) + w, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + return err + } + _, err = fmt.Fprintln(w, f2.name()) + // Close the file first. + if err := w.Close(); err != nil { + fs.log(fmt.Sprintf("close CURRENT.%d: %v", f2.num, err)) + } + if err != nil { + return err + } + return rename(path, filepath.Join(fs.path, "CURRENT")) +} + +func (fs *fileStorage) Close() error { + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return ErrClosed + } + // Clear the finalizer. + runtime.SetFinalizer(fs, nil) + + if fs.open > 0 { + fs.log(fmt.Sprintf("refuse to close, %d files still open", fs.open)) + return fmt.Errorf("leveldb/storage: cannot close, %d files still open", fs.open) + } + fs.open = -1 + e1 := fs.logw.Close() + err := fs.flock.release() + if err == nil { + err = e1 + } + return err +} + +type fileWrap struct { + *os.File + f *file +} + +func (fw fileWrap) Sync() error { + if err := fw.File.Sync(); err != nil { + return err + } + if fw.f.Type() == TypeManifest { + // Also sync parent directory if file type is manifest. + // See: https://code.google.com/p/leveldb/issues/detail?id=190. + if err := syncDir(fw.f.fs.path); err != nil { + return err + } + } + return nil +} + +func (fw fileWrap) Close() error { + f := fw.f + f.fs.mu.Lock() + defer f.fs.mu.Unlock() + if !f.open { + return ErrClosed + } + f.open = false + f.fs.open-- + err := fw.File.Close() + if err != nil { + f.fs.log(fmt.Sprintf("close %s.%d: %v", f.Type(), f.Num(), err)) + } + return err +} + +type file struct { + fs *fileStorage + num uint64 + t FileType + open bool +} + +func (f *file) Open() (Reader, error) { + f.fs.mu.Lock() + defer f.fs.mu.Unlock() + if f.fs.open < 0 { + return nil, ErrClosed + } + if f.open { + return nil, errFileOpen + } + of, err := os.OpenFile(f.path(), os.O_RDONLY, 0) + if err != nil { + if f.hasOldName() && os.IsNotExist(err) { + of, err = os.OpenFile(f.oldPath(), os.O_RDONLY, 0) + if err == nil { + goto ok + } + } + return nil, err + } +ok: + f.open = true + f.fs.open++ + return fileWrap{of, f}, nil +} + +func (f *file) Create() (Writer, error) { + f.fs.mu.Lock() + defer f.fs.mu.Unlock() + if f.fs.open < 0 { + return nil, ErrClosed + } + if f.open { + return nil, errFileOpen + } + of, err := os.OpenFile(f.path(), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + return nil, err + } + f.open = true + f.fs.open++ + return fileWrap{of, f}, nil +} + +func (f *file) Replace(newfile File) error { + f.fs.mu.Lock() + defer f.fs.mu.Unlock() + if f.fs.open < 0 { + return ErrClosed + } + newfile2, ok := newfile.(*file) + if !ok { + return ErrInvalidFile + } + if f.open || newfile2.open { + return errFileOpen + } + return rename(newfile2.path(), f.path()) +} + +func (f *file) Type() FileType { + return f.t +} + +func (f *file) Num() uint64 { + return f.num +} + +func (f *file) Remove() error { + f.fs.mu.Lock() + defer f.fs.mu.Unlock() + if f.fs.open < 0 { + return ErrClosed + } + if f.open { + return errFileOpen + } + err := os.Remove(f.path()) + if err != nil { + f.fs.log(fmt.Sprintf("remove %s.%d: %v", f.Type(), f.Num(), err)) + } + // Also try remove file with old name, just in case. + if f.hasOldName() { + if e1 := os.Remove(f.oldPath()); !os.IsNotExist(e1) { + f.fs.log(fmt.Sprintf("remove %s.%d: %v (old name)", f.Type(), f.Num(), err)) + err = e1 + } + } + return err +} + +func (f *file) hasOldName() bool { + return f.t == TypeTable +} + +func (f *file) oldName() string { + switch f.t { + case TypeTable: + return fmt.Sprintf("%06d.sst", f.num) + } + return f.name() +} + +func (f *file) oldPath() string { + return filepath.Join(f.fs.path, f.oldName()) +} + +func (f *file) name() string { + switch f.t { + case TypeManifest: + return fmt.Sprintf("MANIFEST-%06d", f.num) + case TypeJournal: + return fmt.Sprintf("%06d.log", f.num) + case TypeTable: + return fmt.Sprintf("%06d.ldb", f.num) + case TypeTemp: + return fmt.Sprintf("%06d.tmp", f.num) + default: + panic("invalid file type") + } +} + +func (f *file) path() string { + return filepath.Join(f.fs.path, f.name()) +} + +func (f *file) parse(name string) bool { + var num uint64 + var tail string + _, err := fmt.Sscanf(name, "%d.%s", &num, &tail) + if err == nil { + switch tail { + case "log": + f.t = TypeJournal + case "ldb", "sst": + f.t = TypeTable + case "tmp": + f.t = TypeTemp + default: + return false + } + f.num = num + return true + } + n, _ := fmt.Sscanf(name, "MANIFEST-%d%s", &num, &tail) + if n == 1 { + f.t = TypeManifest + f.num = num + return true + } + + return false +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go new file mode 100644 index 000000000..42940d769 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go @@ -0,0 +1,52 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package storage + +import ( + "os" + "path/filepath" +) + +type plan9FileLock struct { + f *os.File +} + +func (fl *plan9FileLock) release() error { + return fl.f.Close() +} + +func newFileLock(path string) (fl fileLock, err error) { + f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, os.ModeExclusive|0644) + if err != nil { + return + } + fl = &plan9FileLock{f: f} + return +} + +func rename(oldpath, newpath string) error { + if _, err := os.Stat(newpath); err == nil { + if err := os.Remove(newpath); err != nil { + return err + } + } + + _, fname := filepath.Split(newpath) + return os.Rename(oldpath, fname) +} + +func syncDir(name string) error { + f, err := os.Open(name) + if err != nil { + return err + } + defer f.Close() + if err := f.Sync(); err != nil { + return err + } + return nil +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_test.go new file mode 100644 index 000000000..92abcbb7d --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_test.go @@ -0,0 +1,142 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package storage + +import ( + "fmt" + "os" + "path/filepath" + "testing" +) + +var cases = []struct { + oldName []string + name string + ftype FileType + num uint64 +}{ + {nil, "000100.log", TypeJournal, 100}, + {nil, "000000.log", TypeJournal, 0}, + {[]string{"000000.sst"}, "000000.ldb", TypeTable, 0}, + {nil, "MANIFEST-000002", TypeManifest, 2}, + {nil, "MANIFEST-000007", TypeManifest, 7}, + {nil, "18446744073709551615.log", TypeJournal, 18446744073709551615}, + {nil, "000100.tmp", TypeTemp, 100}, +} + +var invalidCases = []string{ + "", + "foo", + "foo-dx-100.log", + ".log", + "", + "manifest", + "CURREN", + "CURRENTX", + "MANIFES", + "MANIFEST", + "MANIFEST-", + "XMANIFEST-3", + "MANIFEST-3x", + "LOC", + "LOCKx", + "LO", + "LOGx", + "18446744073709551616.log", + "184467440737095516150.log", + "100", + "100.", + "100.lop", +} + +func TestFileStorage_CreateFileName(t *testing.T) { + for _, c := range cases { + f := &file{num: c.num, t: c.ftype} + if f.name() != c.name { + t.Errorf("invalid filename got '%s', want '%s'", f.name(), c.name) + } + } +} + +func TestFileStorage_ParseFileName(t *testing.T) { + for _, c := range cases { + for _, name := range append([]string{c.name}, c.oldName...) { + f := new(file) + if !f.parse(name) { + t.Errorf("cannot parse filename '%s'", name) + continue + } + if f.Type() != c.ftype { + t.Errorf("filename '%s' invalid type got '%d', want '%d'", name, f.Type(), c.ftype) + } + if f.Num() != c.num { + t.Errorf("filename '%s' invalid number got '%d', want '%d'", name, f.Num(), c.num) + } + } + } +} + +func TestFileStorage_InvalidFileName(t *testing.T) { + for _, name := range invalidCases { + f := new(file) + if f.parse(name) { + t.Errorf("filename '%s' should be invalid", name) + } + } +} + +func TestFileStorage_Locking(t *testing.T) { + path := filepath.Join(os.TempDir(), fmt.Sprintf("goleveldbtestfd-%d", os.Getuid())) + + _, err := os.Stat(path) + if err == nil { + err = os.RemoveAll(path) + if err != nil { + t.Fatal("RemoveAll: got error: ", err) + } + } + + p1, err := OpenFile(path) + if err != nil { + t.Fatal("OpenFile(1): got error: ", err) + } + + defer os.RemoveAll(path) + + p2, err := OpenFile(path) + if err != nil { + t.Logf("OpenFile(2): got error: %s (expected)", err) + } else { + p2.Close() + p1.Close() + t.Fatal("OpenFile(2): expect error") + } + + p1.Close() + + p3, err := OpenFile(path) + if err != nil { + t.Fatal("OpenFile(3): got error: ", err) + } + defer p3.Close() + + l, err := p3.Lock() + if err != nil { + t.Fatal("storage lock failed(1): ", err) + } + _, err = p3.Lock() + if err == nil { + t.Fatal("expect error for second storage lock attempt") + } else { + t.Logf("storage lock got error: %s (expected)", err) + } + l.Release() + _, err = p3.Lock() + if err != nil { + t.Fatal("storage lock failed(2): ", err) + } +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go new file mode 100644 index 000000000..73499afc8 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go @@ -0,0 +1,63 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// +build darwin freebsd linux netbsd openbsd + +package storage + +import ( + "os" + "syscall" +) + +type unixFileLock struct { + f *os.File +} + +func (fl *unixFileLock) release() error { + if err := setFileLock(fl.f, false); err != nil { + return err + } + return fl.f.Close() +} + +func newFileLock(path string) (fl fileLock, err error) { + f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0644) + if err != nil { + return + } + err = setFileLock(f, true) + if err != nil { + f.Close() + return + } + fl = &unixFileLock{f: f} + return +} + +func setFileLock(f *os.File, lock bool) error { + how := syscall.LOCK_UN + if lock { + how = syscall.LOCK_EX + } + return syscall.Flock(int(f.Fd()), how|syscall.LOCK_NB) +} + +func rename(oldpath, newpath string) error { + return os.Rename(oldpath, newpath) +} + +func syncDir(name string) error { + f, err := os.Open(name) + if err != nil { + return err + } + defer f.Close() + if err := f.Sync(); err != nil { + return err + } + return nil +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go new file mode 100644 index 000000000..50c3c454e --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go @@ -0,0 +1,69 @@ +// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package storage + +import ( + "syscall" + "unsafe" +) + +var ( + modkernel32 = syscall.NewLazyDLL("kernel32.dll") + + procMoveFileExW = modkernel32.NewProc("MoveFileExW") +) + +const ( + _MOVEFILE_REPLACE_EXISTING = 1 +) + +type windowsFileLock struct { + fd syscall.Handle +} + +func (fl *windowsFileLock) release() error { + return syscall.Close(fl.fd) +} + +func newFileLock(path string) (fl fileLock, err error) { + pathp, err := syscall.UTF16PtrFromString(path) + if err != nil { + return + } + fd, err := syscall.CreateFile(pathp, syscall.GENERIC_READ|syscall.GENERIC_WRITE, 0, nil, syscall.CREATE_ALWAYS, syscall.FILE_ATTRIBUTE_NORMAL, 0) + if err != nil { + return + } + fl = &windowsFileLock{fd: fd} + return +} + +func moveFileEx(from *uint16, to *uint16, flags uint32) error { + r1, _, e1 := syscall.Syscall(procMoveFileExW.Addr(), 3, uintptr(unsafe.Pointer(from)), uintptr(unsafe.Pointer(to)), uintptr(flags)) + if r1 == 0 { + if e1 != 0 { + return error(e1) + } else { + return syscall.EINVAL + } + } + return nil +} + +func rename(oldpath, newpath string) error { + from, err := syscall.UTF16PtrFromString(oldpath) + if err != nil { + return err + } + to, err := syscall.UTF16PtrFromString(newpath) + if err != nil { + return err + } + return moveFileEx(from, to, _MOVEFILE_REPLACE_EXISTING) +} + +func syncDir(name string) error { return nil } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go new file mode 100644 index 000000000..fc1c8165d --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go @@ -0,0 +1,203 @@ +// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package storage + +import ( + "bytes" + "os" + "sync" + + "github.com/syndtr/goleveldb/leveldb/util" +) + +const typeShift = 3 + +type memStorageLock struct { + ms *memStorage +} + +func (lock *memStorageLock) Release() { + ms := lock.ms + ms.mu.Lock() + defer ms.mu.Unlock() + if ms.slock == lock { + ms.slock = nil + } + return +} + +// memStorage is a memory-backed storage. +type memStorage struct { + mu sync.Mutex + slock *memStorageLock + files map[uint64]*memFile + manifest *memFilePtr +} + +// NewMemStorage returns a new memory-backed storage implementation. +func NewMemStorage() Storage { + return &memStorage{ + files: make(map[uint64]*memFile), + } +} + +func (ms *memStorage) Lock() (util.Releaser, error) { + ms.mu.Lock() + defer ms.mu.Unlock() + if ms.slock != nil { + return nil, ErrLocked + } + ms.slock = &memStorageLock{ms: ms} + return ms.slock, nil +} + +func (*memStorage) Log(str string) {} + +func (ms *memStorage) GetFile(num uint64, t FileType) File { + return &memFilePtr{ms: ms, num: num, t: t} +} + +func (ms *memStorage) GetFiles(t FileType) ([]File, error) { + ms.mu.Lock() + var ff []File + for x, _ := range ms.files { + num, mt := x>>typeShift, FileType(x)&TypeAll + if mt&t == 0 { + continue + } + ff = append(ff, &memFilePtr{ms: ms, num: num, t: mt}) + } + ms.mu.Unlock() + return ff, nil +} + +func (ms *memStorage) GetManifest() (File, error) { + ms.mu.Lock() + defer ms.mu.Unlock() + if ms.manifest == nil { + return nil, os.ErrNotExist + } + return ms.manifest, nil +} + +func (ms *memStorage) SetManifest(f File) error { + fm, ok := f.(*memFilePtr) + if !ok || fm.t != TypeManifest { + return ErrInvalidFile + } + ms.mu.Lock() + ms.manifest = fm + ms.mu.Unlock() + return nil +} + +func (*memStorage) Close() error { return nil } + +type memReader struct { + *bytes.Reader + m *memFile +} + +func (mr *memReader) Close() error { + return mr.m.Close() +} + +type memFile struct { + bytes.Buffer + ms *memStorage + open bool +} + +func (*memFile) Sync() error { return nil } +func (m *memFile) Close() error { + m.ms.mu.Lock() + m.open = false + m.ms.mu.Unlock() + return nil +} + +type memFilePtr struct { + ms *memStorage + num uint64 + t FileType +} + +func (p *memFilePtr) x() uint64 { + return p.Num()<<typeShift | uint64(p.Type()) +} + +func (p *memFilePtr) Open() (Reader, error) { + ms := p.ms + ms.mu.Lock() + defer ms.mu.Unlock() + if m, exist := ms.files[p.x()]; exist { + if m.open { + return nil, errFileOpen + } + m.open = true + return &memReader{Reader: bytes.NewReader(m.Bytes()), m: m}, nil + } + return nil, os.ErrNotExist +} + +func (p *memFilePtr) Create() (Writer, error) { + ms := p.ms + ms.mu.Lock() + defer ms.mu.Unlock() + m, exist := ms.files[p.x()] + if exist { + if m.open { + return nil, errFileOpen + } + m.Reset() + } else { + m = &memFile{ms: ms} + ms.files[p.x()] = m + } + m.open = true + return m, nil +} + +func (p *memFilePtr) Replace(newfile File) error { + p1, ok := newfile.(*memFilePtr) + if !ok { + return ErrInvalidFile + } + ms := p.ms + ms.mu.Lock() + defer ms.mu.Unlock() + m1, exist := ms.files[p1.x()] + if !exist { + return os.ErrNotExist + } + m0, exist := ms.files[p.x()] + if (exist && m0.open) || m1.open { + return errFileOpen + } + delete(ms.files, p1.x()) + ms.files[p.x()] = m1 + return nil +} + +func (p *memFilePtr) Type() FileType { + return p.t +} + +func (p *memFilePtr) Num() uint64 { + return p.num +} + +func (p *memFilePtr) Remove() error { + ms := p.ms + ms.mu.Lock() + defer ms.mu.Unlock() + if _, exist := ms.files[p.x()]; exist { + delete(ms.files, p.x()) + return nil + } + return os.ErrNotExist +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/mem_storage_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/mem_storage_test.go new file mode 100644 index 000000000..23bb074b4 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/mem_storage_test.go @@ -0,0 +1,66 @@ +// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package storage + +import ( + "bytes" + "testing" +) + +func TestMemStorage(t *testing.T) { + m := NewMemStorage() + + l, err := m.Lock() + if err != nil { + t.Fatal("storage lock failed(1): ", err) + } + _, err = m.Lock() + if err == nil { + t.Fatal("expect error for second storage lock attempt") + } else { + t.Logf("storage lock got error: %s (expected)", err) + } + l.Release() + _, err = m.Lock() + if err != nil { + t.Fatal("storage lock failed(2): ", err) + } + + f := m.GetFile(1, TypeTable) + if f.Num() != 1 && f.Type() != TypeTable { + t.Fatal("invalid file number and type") + } + w, _ := f.Create() + w.Write([]byte("abc")) + w.Close() + if ff, _ := m.GetFiles(TypeAll); len(ff) != 1 { + t.Fatal("invalid GetFiles len") + } + buf := new(bytes.Buffer) + r, err := f.Open() + if err != nil { + t.Fatal("Open: got error: ", err) + } + buf.ReadFrom(r) + r.Close() + if got := buf.String(); got != "abc" { + t.Fatalf("Read: invalid value, want=abc got=%s", got) + } + if _, err := f.Open(); err != nil { + t.Fatal("Open: got error: ", err) + } + if _, err := m.GetFile(1, TypeTable).Open(); err == nil { + t.Fatal("expecting error") + } + f.Remove() + if ff, _ := m.GetFiles(TypeAll); len(ff) != 0 { + t.Fatal("invalid GetFiles len", len(ff)) + } + if _, err := f.Open(); err == nil { + t.Fatal("expecting error") + } +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/storage.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/storage.go new file mode 100644 index 000000000..de5694888 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage/storage.go @@ -0,0 +1,127 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package storage provides storage abstraction for LevelDB. +package storage + +import ( + "errors" + "fmt" + "io" + + "github.com/syndtr/goleveldb/leveldb/util" +) + +type FileType uint32 + +const ( + TypeManifest FileType = 1 << iota + TypeJournal + TypeTable + TypeTemp + + TypeAll = TypeManifest | TypeJournal | TypeTable | TypeTemp +) + +func (t FileType) String() string { + switch t { + case TypeManifest: + return "manifest" + case TypeJournal: + return "journal" + case TypeTable: + return "table" + case TypeTemp: + return "temp" + } + return fmt.Sprintf("<unknown:%d>", t) +} + +var ( + ErrInvalidFile = errors.New("leveldb/storage: invalid file for argument") + ErrLocked = errors.New("leveldb/storage: already locked") + ErrClosed = errors.New("leveldb/storage: closed") +) + +// Syncer is the interface that wraps basic Sync method. +type Syncer interface { + // Sync commits the current contents of the file to stable storage. + Sync() error +} + +// Reader is the interface that groups the basic Read, Seek, ReadAt and Close +// methods. +type Reader interface { + io.ReadSeeker + io.ReaderAt + io.Closer +} + +// Writer is the interface that groups the basic Write, Sync and Close +// methods. +type Writer interface { + io.WriteCloser + Syncer +} + +// File is the file. +type File interface { + // Open opens the file for read. Returns os.ErrNotExist error + // if the file does not exist. + // Returns ErrClosed if the underlying storage is closed. + Open() (r Reader, err error) + + // Create creates the file for writting. Truncate the file if + // already exist. + // Returns ErrClosed if the underlying storage is closed. + Create() (w Writer, err error) + + // Replace replaces file with newfile. + // Returns ErrClosed if the underlying storage is closed. + Replace(newfile File) error + + // Type returns the file type + Type() FileType + + // Num returns the file number. + Num() uint64 + + // Remove removes the file. + // Returns ErrClosed if the underlying storage is closed. + Remove() error +} + +// Storage is the storage. +type Storage interface { + // Lock locks the storage. Any subsequent attempt to call Lock will fail + // until the last lock released. + // After use the caller should call the Release method. + Lock() (l util.Releaser, err error) + + // Log logs a string. This is used for logging. An implementation + // may write to a file, stdout or simply do nothing. + Log(str string) + + // GetFile returns a file for the given number and type. GetFile will never + // returns nil, even if the underlying storage is closed. + GetFile(num uint64, t FileType) File + + // GetFiles returns a slice of files that match the given file types. + // The file types may be OR'ed together. + GetFiles(t FileType) ([]File, error) + + // GetManifest returns a manifest file. Returns os.ErrNotExist if manifest + // file does not exist. + GetManifest() (File, error) + + // SetManifest sets the given file as manifest file. The given file should + // be a manifest file type or error will be returned. + SetManifest(f File) error + + // Close closes the storage. It is valid to call Close multiple times. + // Other methods should not be called after the storage has been closed. + Close() error +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage_test.go new file mode 100644 index 000000000..27e76d707 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage_test.go @@ -0,0 +1,459 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENE file. + +package leveldb + +import ( + "errors" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "sync" + "testing" + + "github.com/syndtr/goleveldb/leveldb/storage" + "github.com/syndtr/goleveldb/leveldb/util" +) + +const typeShift = 4 + +var ( + tsErrInvalidFile = errors.New("leveldb.testStorage: invalid file for argument") + tsErrFileOpen = errors.New("leveldb.testStorage: file still open") +) + +var ( + tsFSEnv = os.Getenv("GOLEVELDB_USEFS") + tsKeepFS = tsFSEnv == "2" + tsFS = tsKeepFS || tsFSEnv == "" || tsFSEnv == "1" + tsMU = &sync.Mutex{} + tsNum = 0 +) + +type tsLock struct { + ts *testStorage + r util.Releaser +} + +func (l tsLock) Release() { + l.r.Release() + l.ts.t.Log("I: storage lock released") +} + +type tsReader struct { + tf tsFile + storage.Reader +} + +func (tr tsReader) Read(b []byte) (n int, err error) { + ts := tr.tf.ts + ts.countRead(tr.tf.Type()) + n, err = tr.Reader.Read(b) + if err != nil && err != io.EOF { + ts.t.Errorf("E: read error, num=%d type=%v n=%d: %v", tr.tf.Num(), tr.tf.Type(), n, err) + } + return +} + +func (tr tsReader) ReadAt(b []byte, off int64) (n int, err error) { + ts := tr.tf.ts + ts.countRead(tr.tf.Type()) + n, err = tr.Reader.ReadAt(b, off) + if err != nil && err != io.EOF { + ts.t.Errorf("E: readAt error, num=%d type=%v off=%d n=%d: %v", tr.tf.Num(), tr.tf.Type(), off, n, err) + } + return +} + +func (tr tsReader) Close() (err error) { + err = tr.Reader.Close() + tr.tf.close("reader", err) + return +} + +type tsWriter struct { + tf tsFile + storage.Writer +} + +func (tw tsWriter) Write(b []byte) (n int, err error) { + ts := tw.tf.ts + ts.mu.Lock() + defer ts.mu.Unlock() + if ts.emuWriteErr&tw.tf.Type() != 0 { + return 0, errors.New("leveldb.testStorage: emulated write error") + } + n, err = tw.Writer.Write(b) + if err != nil { + ts.t.Errorf("E: write error, num=%d type=%v n=%d: %v", tw.tf.Num(), tw.tf.Type(), n, err) + } + return +} + +func (tw tsWriter) Sync() (err error) { + ts := tw.tf.ts + ts.mu.Lock() + defer ts.mu.Unlock() + for ts.emuDelaySync&tw.tf.Type() != 0 { + ts.cond.Wait() + } + if ts.emuSyncErr&tw.tf.Type() != 0 { + return errors.New("leveldb.testStorage: emulated sync error") + } + err = tw.Writer.Sync() + if err != nil { + ts.t.Errorf("E: sync error, num=%d type=%v: %v", tw.tf.Num(), tw.tf.Type(), err) + } + return +} + +func (tw tsWriter) Close() (err error) { + err = tw.Writer.Close() + tw.tf.close("reader", err) + return +} + +type tsFile struct { + ts *testStorage + storage.File +} + +func (tf tsFile) x() uint64 { + return tf.Num()<<typeShift | uint64(tf.Type()) +} + +func (tf tsFile) checkOpen(m string) error { + ts := tf.ts + if writer, ok := ts.opens[tf.x()]; ok { + if writer { + ts.t.Errorf("E: cannot %s file, num=%d type=%v: a writer still open", m, tf.Num(), tf.Type()) + } else { + ts.t.Errorf("E: cannot %s file, num=%d type=%v: a reader still open", m, tf.Num(), tf.Type()) + } + return tsErrFileOpen + } + return nil +} + +func (tf tsFile) close(m string, err error) { + ts := tf.ts + ts.mu.Lock() + defer ts.mu.Unlock() + if _, ok := ts.opens[tf.x()]; !ok { + ts.t.Errorf("E: %s: redudant file closing, num=%d type=%v", m, tf.Num(), tf.Type()) + } else if err == nil { + ts.t.Logf("I: %s: file closed, num=%d type=%v", m, tf.Num(), tf.Type()) + } + delete(ts.opens, tf.x()) + if err != nil { + ts.t.Errorf("E: %s: cannot close file, num=%d type=%v: %v", m, tf.Num(), tf.Type(), err) + } +} + +func (tf tsFile) Open() (r storage.Reader, err error) { + ts := tf.ts + ts.mu.Lock() + defer ts.mu.Unlock() + err = tf.checkOpen("open") + if err != nil { + return + } + if ts.emuOpenErr&tf.Type() != 0 { + err = errors.New("leveldb.testStorage: emulated open error") + return + } + r, err = tf.File.Open() + if err != nil { + if ts.ignoreOpenErr&tf.Type() != 0 { + ts.t.Logf("I: cannot open file, num=%d type=%v: %v (ignored)", tf.Num(), tf.Type(), err) + } else { + ts.t.Errorf("E: cannot open file, num=%d type=%v: %v", tf.Num(), tf.Type(), err) + } + } else { + ts.t.Logf("I: file opened, num=%d type=%v", tf.Num(), tf.Type()) + ts.opens[tf.x()] = false + r = tsReader{tf, r} + } + return +} + +func (tf tsFile) Create() (w storage.Writer, err error) { + ts := tf.ts + ts.mu.Lock() + defer ts.mu.Unlock() + err = tf.checkOpen("create") + if err != nil { + return + } + if ts.emuCreateErr&tf.Type() != 0 { + err = errors.New("leveldb.testStorage: emulated create error") + return + } + w, err = tf.File.Create() + if err != nil { + ts.t.Errorf("E: cannot create file, num=%d type=%v: %v", tf.Num(), tf.Type(), err) + } else { + ts.t.Logf("I: file created, num=%d type=%v", tf.Num(), tf.Type()) + ts.opens[tf.x()] = true + w = tsWriter{tf, w} + } + return +} + +func (tf tsFile) Remove() (err error) { + ts := tf.ts + ts.mu.Lock() + defer ts.mu.Unlock() + err = tf.checkOpen("remove") + if err != nil { + return + } + err = tf.File.Remove() + if err != nil { + ts.t.Errorf("E: cannot remove file, num=%d type=%v: %v", tf.Num(), tf.Type(), err) + } else { + ts.t.Logf("I: file removed, num=%d type=%v", tf.Num(), tf.Type()) + } + return +} + +type testStorage struct { + t *testing.T + storage.Storage + closeFn func() error + + mu sync.Mutex + cond sync.Cond + // Open files, true=writer, false=reader + opens map[uint64]bool + emuOpenErr storage.FileType + emuCreateErr storage.FileType + emuDelaySync storage.FileType + emuWriteErr storage.FileType + emuSyncErr storage.FileType + ignoreOpenErr storage.FileType + readCnt uint64 + readCntEn storage.FileType +} + +func (ts *testStorage) SetOpenErr(t storage.FileType) { + ts.mu.Lock() + ts.emuOpenErr = t + ts.mu.Unlock() +} + +func (ts *testStorage) SetCreateErr(t storage.FileType) { + ts.mu.Lock() + ts.emuCreateErr = t + ts.mu.Unlock() +} + +func (ts *testStorage) DelaySync(t storage.FileType) { + ts.mu.Lock() + ts.emuDelaySync |= t + ts.cond.Broadcast() + ts.mu.Unlock() +} + +func (ts *testStorage) ReleaseSync(t storage.FileType) { + ts.mu.Lock() + ts.emuDelaySync &= ^t + ts.cond.Broadcast() + ts.mu.Unlock() +} + +func (ts *testStorage) SetWriteErr(t storage.FileType) { + ts.mu.Lock() + ts.emuWriteErr = t + ts.mu.Unlock() +} + +func (ts *testStorage) SetSyncErr(t storage.FileType) { + ts.mu.Lock() + ts.emuSyncErr = t + ts.mu.Unlock() +} + +func (ts *testStorage) ReadCounter() uint64 { + ts.mu.Lock() + defer ts.mu.Unlock() + return ts.readCnt +} + +func (ts *testStorage) ResetReadCounter() { + ts.mu.Lock() + ts.readCnt = 0 + ts.mu.Unlock() +} + +func (ts *testStorage) SetReadCounter(t storage.FileType) { + ts.mu.Lock() + ts.readCntEn = t + ts.mu.Unlock() +} + +func (ts *testStorage) countRead(t storage.FileType) { + ts.mu.Lock() + if ts.readCntEn&t != 0 { + ts.readCnt++ + } + ts.mu.Unlock() +} + +func (ts *testStorage) SetIgnoreOpenErr(t storage.FileType) { + ts.ignoreOpenErr = t +} + +func (ts *testStorage) Lock() (r util.Releaser, err error) { + r, err = ts.Storage.Lock() + if err != nil { + ts.t.Logf("W: storage locking failed: %v", err) + } else { + ts.t.Log("I: storage locked") + r = tsLock{ts, r} + } + return +} + +func (ts *testStorage) Log(str string) { + ts.t.Log("L: " + str) + ts.Storage.Log(str) +} + +func (ts *testStorage) GetFile(num uint64, t storage.FileType) storage.File { + return tsFile{ts, ts.Storage.GetFile(num, t)} +} + +func (ts *testStorage) GetFiles(t storage.FileType) (ff []storage.File, err error) { + ff0, err := ts.Storage.GetFiles(t) + if err != nil { + ts.t.Errorf("E: get files failed: %v", err) + return + } + ff = make([]storage.File, len(ff0)) + for i, f := range ff0 { + ff[i] = tsFile{ts, f} + } + ts.t.Logf("I: get files, type=0x%x count=%d", int(t), len(ff)) + return +} + +func (ts *testStorage) GetManifest() (f storage.File, err error) { + f0, err := ts.Storage.GetManifest() + if err != nil { + if !os.IsNotExist(err) { + ts.t.Errorf("E: get manifest failed: %v", err) + } + return + } + f = tsFile{ts, f0} + ts.t.Logf("I: get manifest, num=%d", f.Num()) + return +} + +func (ts *testStorage) SetManifest(f storage.File) error { + tf, ok := f.(tsFile) + if !ok { + ts.t.Error("E: set manifest failed: type assertion failed") + return tsErrInvalidFile + } else if tf.Type() != storage.TypeManifest { + ts.t.Errorf("E: set manifest failed: invalid file type: %s", tf.Type()) + return tsErrInvalidFile + } + err := ts.Storage.SetManifest(tf.File) + if err != nil { + ts.t.Errorf("E: set manifest failed: %v", err) + } else { + ts.t.Logf("I: set manifest, num=%d", tf.Num()) + } + return err +} + +func (ts *testStorage) Close() error { + ts.CloseCheck() + err := ts.Storage.Close() + if err != nil { + ts.t.Errorf("E: closing storage failed: %v", err) + } else { + ts.t.Log("I: storage closed") + } + if ts.closeFn != nil { + if err := ts.closeFn(); err != nil { + ts.t.Errorf("E: close function: %v", err) + } + } + return err +} + +func (ts *testStorage) CloseCheck() { + ts.mu.Lock() + if len(ts.opens) == 0 { + ts.t.Log("I: all files are closed") + } else { + ts.t.Errorf("E: %d files still open", len(ts.opens)) + for x, writer := range ts.opens { + num, tt := x>>typeShift, storage.FileType(x)&storage.TypeAll + ts.t.Errorf("E: * num=%d type=%v writer=%v", num, tt, writer) + } + } + ts.mu.Unlock() +} + +func newTestStorage(t *testing.T) *testStorage { + var stor storage.Storage + var closeFn func() error + if tsFS { + for { + tsMU.Lock() + num := tsNum + tsNum++ + tsMU.Unlock() + path := filepath.Join(os.TempDir(), fmt.Sprintf("goleveldb-test%d0%d0%d", os.Getuid(), os.Getpid(), num)) + if _, err := os.Stat(path); err != nil { + stor, err = storage.OpenFile(path) + if err != nil { + t.Fatalf("F: cannot create storage: %v", err) + } + t.Logf("I: storage created: %s", path) + closeFn = func() error { + for _, name := range []string{"LOG.old", "LOG"} { + f, err := os.Open(filepath.Join(path, name)) + if err != nil { + continue + } + if log, err := ioutil.ReadAll(f); err != nil { + t.Logf("---------------------- %s ----------------------", name) + t.Logf("cannot read log: %v", err) + t.Logf("---------------------- %s ----------------------", name) + } else if len(log) > 0 { + t.Logf("---------------------- %s ----------------------\n%s", name, string(log)) + t.Logf("---------------------- %s ----------------------", name) + } + f.Close() + } + if tsKeepFS { + return nil + } + return os.RemoveAll(path) + } + + break + } + } + } else { + stor = storage.NewMemStorage() + } + ts := &testStorage{ + t: t, + Storage: stor, + closeFn: closeFn, + opens: make(map[uint64]bool), + } + ts.cond.L = &ts.mu + return ts +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table.go new file mode 100644 index 000000000..fdd5d2bcf --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table.go @@ -0,0 +1,424 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "sort" + "sync/atomic" + + "github.com/syndtr/goleveldb/leveldb/cache" + "github.com/syndtr/goleveldb/leveldb/comparer" + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/storage" + "github.com/syndtr/goleveldb/leveldb/table" + "github.com/syndtr/goleveldb/leveldb/util" +) + +// table file +type tFile struct { + file storage.File + seekLeft int32 + size uint64 + min, max iKey +} + +// test if key is after t +func (t *tFile) isAfter(key []byte, ucmp comparer.BasicComparer) bool { + return key != nil && ucmp.Compare(key, t.max.ukey()) > 0 +} + +// test if key is before t +func (t *tFile) isBefore(key []byte, ucmp comparer.BasicComparer) bool { + return key != nil && ucmp.Compare(key, t.min.ukey()) < 0 +} + +func (t *tFile) incrSeek() int32 { + return atomic.AddInt32(&t.seekLeft, -1) +} + +func newTFile(file storage.File, size uint64, min, max iKey) *tFile { + f := &tFile{ + file: file, + size: size, + min: min, + max: max, + } + + // We arrange to automatically compact this file after + // a certain number of seeks. Let's assume: + // (1) One seek costs 10ms + // (2) Writing or reading 1MB costs 10ms (100MB/s) + // (3) A compaction of 1MB does 25MB of IO: + // 1MB read from this level + // 10-12MB read from next level (boundaries may be misaligned) + // 10-12MB written to next level + // This implies that 25 seeks cost the same as the compaction + // of 1MB of data. I.e., one seek costs approximately the + // same as the compaction of 40KB of data. We are a little + // conservative and allow approximately one seek for every 16KB + // of data before triggering a compaction. + f.seekLeft = int32(size / 16384) + if f.seekLeft < 100 { + f.seekLeft = 100 + } + + return f +} + +// table files +type tFiles []*tFile + +func (tf tFiles) Len() int { return len(tf) } +func (tf tFiles) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] } + +func (tf tFiles) lessByKey(icmp *iComparer, i, j int) bool { + a, b := tf[i], tf[j] + n := icmp.Compare(a.min, b.min) + if n == 0 { + return a.file.Num() < b.file.Num() + } + return n < 0 +} + +func (tf tFiles) lessByNum(i, j int) bool { + return tf[i].file.Num() > tf[j].file.Num() +} + +func (tf tFiles) sortByKey(icmp *iComparer) { + sort.Sort(&tFilesSortByKey{tFiles: tf, icmp: icmp}) +} + +func (tf tFiles) sortByNum() { + sort.Sort(&tFilesSortByNum{tFiles: tf}) +} + +func (tf tFiles) size() (sum uint64) { + for _, t := range tf { + sum += t.size + } + return sum +} + +func (tf tFiles) searchMin(key iKey, icmp *iComparer) int { + return sort.Search(len(tf), func(i int) bool { + return icmp.Compare(tf[i].min, key) >= 0 + }) +} + +func (tf tFiles) searchMax(key iKey, icmp *iComparer) int { + return sort.Search(len(tf), func(i int) bool { + return icmp.Compare(tf[i].max, key) >= 0 + }) +} + +func (tf tFiles) isOverlaps(min, max []byte, disjSorted bool, icmp *iComparer) bool { + if !disjSorted { + // Need to check against all files + for _, t := range tf { + if !t.isAfter(min, icmp.ucmp) && !t.isBefore(max, icmp.ucmp) { + return true + } + } + return false + } + + var idx int + if len(min) > 0 { + // Find the earliest possible internal key for min + idx = tf.searchMax(newIKey(min, kMaxSeq, tSeek), icmp) + } + + if idx >= len(tf) { + // beginning of range is after all files, so no overlap + return false + } + return !tf[idx].isBefore(max, icmp.ucmp) +} + +func (tf tFiles) getOverlaps(min, max []byte, r *tFiles, disjSorted bool, ucmp comparer.BasicComparer) { + for i := 0; i < len(tf); { + t := tf[i] + i++ + if t.isAfter(min, ucmp) || t.isBefore(max, ucmp) { + continue + } + + *r = append(*r, t) + if !disjSorted { + // Level-0 files may overlap each other. So check if the newly + // added file has expanded the range. If so, restart search. + if min != nil && ucmp.Compare(t.min.ukey(), min) < 0 { + min = t.min.ukey() + *r = nil + i = 0 + } else if max != nil && ucmp.Compare(t.max.ukey(), max) > 0 { + max = t.max.ukey() + *r = nil + i = 0 + } + } + } + + return +} + +func (tf tFiles) getRange(icmp *iComparer) (min, max iKey) { + for i, t := range tf { + if i == 0 { + min, max = t.min, t.max + continue + } + if icmp.Compare(t.min, min) < 0 { + min = t.min + } + if icmp.Compare(t.max, max) > 0 { + max = t.max + } + } + + return +} + +func (tf tFiles) newIndexIterator(tops *tOps, icmp *iComparer, slice *util.Range, ro *opt.ReadOptions) iterator.IteratorIndexer { + if slice != nil { + var start, limit int + if slice.Start != nil { + start = tf.searchMax(iKey(slice.Start), icmp) + } + if slice.Limit != nil { + limit = tf.searchMin(iKey(slice.Limit), icmp) + } else { + limit = tf.Len() + } + tf = tf[start:limit] + } + return iterator.NewArrayIndexer(&tFilesArrayIndexer{ + tFiles: tf, + tops: tops, + icmp: icmp, + slice: slice, + ro: ro, + }) +} + +type tFilesArrayIndexer struct { + tFiles + tops *tOps + icmp *iComparer + slice *util.Range + ro *opt.ReadOptions +} + +func (a *tFilesArrayIndexer) Search(key []byte) int { + return a.searchMax(iKey(key), a.icmp) +} + +func (a *tFilesArrayIndexer) Get(i int) iterator.Iterator { + if i == 0 || i == a.Len()-1 { + return a.tops.newIterator(a.tFiles[i], a.slice, a.ro) + } + return a.tops.newIterator(a.tFiles[i], nil, a.ro) +} + +type tFilesSortByKey struct { + tFiles + icmp *iComparer +} + +func (x *tFilesSortByKey) Less(i, j int) bool { + return x.lessByKey(x.icmp, i, j) +} + +type tFilesSortByNum struct { + tFiles +} + +func (x *tFilesSortByNum) Less(i, j int) bool { + return x.lessByNum(i, j) +} + +// table operations +type tOps struct { + s *session + cache cache.Cache + cacheNS cache.Namespace +} + +func newTableOps(s *session, cacheCap int) *tOps { + c := cache.NewLRUCache(cacheCap) + ns := c.GetNamespace(0) + return &tOps{s, c, ns} +} + +func (t *tOps) create() (*tWriter, error) { + file := t.s.getTableFile(t.s.allocFileNum()) + fw, err := file.Create() + if err != nil { + return nil, err + } + return &tWriter{ + t: t, + file: file, + w: fw, + tw: table.NewWriter(fw, t.s.o), + }, nil +} + +func (t *tOps) createFrom(src iterator.Iterator) (f *tFile, n int, err error) { + w, err := t.create() + if err != nil { + return f, n, err + } + + defer func() { + if err != nil { + w.drop() + } + }() + + for src.Next() { + err = w.add(src.Key(), src.Value()) + if err != nil { + return + } + } + err = src.Error() + if err != nil { + return + } + + n = w.tw.EntriesLen() + f, err = w.finish() + return +} + +func (t *tOps) lookup(f *tFile) (c cache.Object, err error) { + num := f.file.Num() + c, ok := t.cacheNS.Get(num, func() (ok bool, value interface{}, charge int, fin cache.SetFin) { + var r storage.Reader + r, err = f.file.Open() + if err != nil { + return + } + + o := t.s.o + + var cacheNS cache.Namespace + if bc := o.GetBlockCache(); bc != nil { + cacheNS = bc.GetNamespace(num) + } + + ok = true + value = table.NewReader(r, int64(f.size), cacheNS, o) + charge = 1 + fin = func() { + r.Close() + } + return + }) + if !ok && err == nil { + err = ErrClosed + } + return +} + +func (t *tOps) get(f *tFile, key []byte, ro *opt.ReadOptions) (rkey, rvalue []byte, err error) { + c, err := t.lookup(f) + if err != nil { + return nil, nil, err + } + defer c.Release() + return c.Value().(*table.Reader).Find(key, ro) +} + +func (t *tOps) offsetOf(f *tFile, key []byte) (offset uint64, err error) { + c, err := t.lookup(f) + if err != nil { + return + } + _offset, err := c.Value().(*table.Reader).OffsetOf(key) + offset = uint64(_offset) + c.Release() + return +} + +func (t *tOps) newIterator(f *tFile, slice *util.Range, ro *opt.ReadOptions) iterator.Iterator { + c, err := t.lookup(f) + if err != nil { + return iterator.NewEmptyIterator(err) + } + iter := c.Value().(*table.Reader).NewIterator(slice, ro) + iter.SetReleaser(c) + return iter +} + +func (t *tOps) remove(f *tFile) { + num := f.file.Num() + t.cacheNS.Delete(num, func(exist bool) { + if err := f.file.Remove(); err != nil { + t.s.logf("table@remove removing @%d %q", num, err) + } else { + t.s.logf("table@remove removed @%d", num) + } + if bc := t.s.o.GetBlockCache(); bc != nil { + bc.GetNamespace(num).Zap(false) + } + }) +} + +func (t *tOps) close() { + t.cache.Zap(true) +} + +type tWriter struct { + t *tOps + + file storage.File + w storage.Writer + tw *table.Writer + + first, last []byte +} + +func (w *tWriter) add(key, value []byte) error { + if w.first == nil { + w.first = append([]byte{}, key...) + } + w.last = append(w.last[:0], key...) + return w.tw.Append(key, value) +} + +func (w *tWriter) empty() bool { + return w.first == nil +} + +func (w *tWriter) finish() (f *tFile, err error) { + err = w.tw.Close() + if err != nil { + return + } + err = w.w.Sync() + if err != nil { + w.w.Close() + return + } + w.w.Close() + f = newTFile(w.file, uint64(w.tw.BytesLen()), iKey(w.first), iKey(w.last)) + return +} + +func (w *tWriter) drop() { + w.w.Close() + w.file.Remove() + w.t.s.reuseFileNum(w.file.Num()) + w.w = nil + w.file = nil + w.tw = nil + w.first = nil + w.last = nil +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/block_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/block_test.go new file mode 100644 index 000000000..ca598f4f5 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/block_test.go @@ -0,0 +1,131 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package table + +import ( + "encoding/binary" + "fmt" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + + "github.com/syndtr/goleveldb/leveldb/comparer" + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/testutil" + "github.com/syndtr/goleveldb/leveldb/util" +) + +func (b *block) TestNewIterator(slice *util.Range) iterator.Iterator { + return b.newIterator(slice, false, nil) +} + +var _ = testutil.Defer(func() { + Describe("Block", func() { + Build := func(kv *testutil.KeyValue, restartInterval int) *block { + // Building the block. + bw := &blockWriter{ + restartInterval: restartInterval, + scratch: make([]byte, 30), + } + kv.Iterate(func(i int, key, value []byte) { + bw.append(key, value) + }) + bw.finish() + + // Opening the block. + data := bw.buf.Bytes() + restartsLen := int(binary.LittleEndian.Uint32(data[len(data)-4:])) + return &block{ + cmp: comparer.DefaultComparer, + data: data, + restartsLen: restartsLen, + restartsOffset: len(data) - (restartsLen+1)*4, + } + } + + Describe("read test", func() { + for restartInterval := 1; restartInterval <= 5; restartInterval++ { + Describe(fmt.Sprintf("with restart interval of %d", restartInterval), func() { + kv := &testutil.KeyValue{} + Text := func() string { + return fmt.Sprintf("and %d keys", kv.Len()) + } + + Test := func() { + // Make block. + br := Build(kv, restartInterval) + // Do testing. + testutil.KeyValueTesting(nil, br, kv.Clone()) + } + + Describe(Text(), Test) + + kv.PutString("", "empty") + Describe(Text(), Test) + + kv.PutString("a1", "foo") + Describe(Text(), Test) + + kv.PutString("a2", "v") + Describe(Text(), Test) + + kv.PutString("a3qqwrkks", "hello") + Describe(Text(), Test) + + kv.PutString("a4", "bar") + Describe(Text(), Test) + + kv.PutString("a5111111", "v5") + kv.PutString("a6", "") + kv.PutString("a7", "v7") + kv.PutString("a8", "vvvvvvvvvvvvvvvvvvvvvv8") + kv.PutString("b", "v9") + kv.PutString("c9", "v9") + kv.PutString("c91", "v9") + kv.PutString("d0", "v9") + Describe(Text(), Test) + }) + } + }) + + Describe("out-of-bound slice test", func() { + kv := &testutil.KeyValue{} + kv.PutString("k1", "v1") + kv.PutString("k2", "v2") + kv.PutString("k3abcdefgg", "v3") + kv.PutString("k4", "v4") + kv.PutString("k5", "v5") + for restartInterval := 1; restartInterval <= 5; restartInterval++ { + Describe(fmt.Sprintf("with restart interval of %d", restartInterval), func() { + // Make block. + br := Build(kv, restartInterval) + + Test := func(r *util.Range) func(done Done) { + return func(done Done) { + iter := br.newIterator(r, false, nil) + Expect(iter.Error()).ShouldNot(HaveOccurred()) + + t := testutil.IteratorTesting{ + KeyValue: kv.Clone(), + Iter: iter, + } + + testutil.DoIteratorTesting(&t) + done <- true + } + } + + It("Should do iterations and seeks correctly #0", + Test(&util.Range{Start: []byte("k0"), Limit: []byte("k6")}), 2.0) + + It("Should do iterations and seeks correctly #1", + Test(&util.Range{Start: []byte(""), Limit: []byte("zzzzzzz")}), 2.0) + }) + } + }) + }) +}) diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/reader.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/reader.go new file mode 100644 index 000000000..8acb9f720 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/reader.go @@ -0,0 +1,848 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package table + +import ( + "encoding/binary" + "errors" + "fmt" + "io" + "sort" + "strings" + + "code.google.com/p/snappy-go/snappy" + + "github.com/syndtr/goleveldb/leveldb/cache" + "github.com/syndtr/goleveldb/leveldb/comparer" + "github.com/syndtr/goleveldb/leveldb/filter" + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/util" +) + +var ( + ErrNotFound = util.ErrNotFound + ErrIterReleased = errors.New("leveldb/table: iterator released") +) + +func max(x, y int) int { + if x > y { + return x + } + return y +} + +type block struct { + cmp comparer.BasicComparer + data []byte + restartsLen int + restartsOffset int + // Whether checksum is verified and valid. + checksum bool +} + +func (b *block) seek(rstart, rlimit int, key []byte) (index, offset int, err error) { + n := b.restartsOffset + data := b.data + cmp := b.cmp + + index = sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool { + offset := int(binary.LittleEndian.Uint32(data[n+4*(rstart+i):])) + offset += 1 // shared always zero, since this is a restart point + v1, n1 := binary.Uvarint(data[offset:]) // key length + _, n2 := binary.Uvarint(data[offset+n1:]) // value length + m := offset + n1 + n2 + return cmp.Compare(data[m:m+int(v1)], key) > 0 + }) + rstart - 1 + if index < rstart { + // The smallest key is greater-than key sought. + index = rstart + } + offset = int(binary.LittleEndian.Uint32(data[n+4*index:])) + return +} + +func (b *block) restartIndex(rstart, rlimit, offset int) int { + n := b.restartsOffset + data := b.data + return sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool { + return int(binary.LittleEndian.Uint32(data[n+4*(rstart+i):])) > offset + }) + rstart - 1 +} + +func (b *block) restartOffset(index int) int { + return int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*index:])) +} + +func (b *block) entry(offset int) (key, value []byte, nShared, n int, err error) { + if offset >= b.restartsOffset { + if offset != b.restartsOffset { + err = errors.New("leveldb/table: Reader: BlockEntry: invalid block (block entries offset not aligned)") + } + return + } + v0, n0 := binary.Uvarint(b.data[offset:]) // Shared prefix length + v1, n1 := binary.Uvarint(b.data[offset+n0:]) // Key length + v2, n2 := binary.Uvarint(b.data[offset+n0+n1:]) // Value length + m := n0 + n1 + n2 + n = m + int(v1) + int(v2) + if n0 <= 0 || n1 <= 0 || n2 <= 0 || offset+n > b.restartsOffset { + err = errors.New("leveldb/table: Reader: invalid block (block entries corrupted)") + return + } + key = b.data[offset+m : offset+m+int(v1)] + value = b.data[offset+m+int(v1) : offset+n] + nShared = int(v0) + return +} + +func (b *block) newIterator(slice *util.Range, inclLimit bool, cache util.Releaser) *blockIter { + bi := &blockIter{ + block: b, + cache: cache, + // Valid key should never be nil. + key: make([]byte, 0), + dir: dirSOI, + riStart: 0, + riLimit: b.restartsLen, + offsetStart: 0, + offsetRealStart: 0, + offsetLimit: b.restartsOffset, + } + if slice != nil { + if slice.Start != nil { + if bi.Seek(slice.Start) { + bi.riStart = b.restartIndex(bi.restartIndex, b.restartsLen, bi.prevOffset) + bi.offsetStart = b.restartOffset(bi.riStart) + bi.offsetRealStart = bi.prevOffset + } else { + bi.riStart = b.restartsLen + bi.offsetStart = b.restartsOffset + bi.offsetRealStart = b.restartsOffset + } + } + if slice.Limit != nil { + if bi.Seek(slice.Limit) && (!inclLimit || bi.Next()) { + bi.offsetLimit = bi.prevOffset + bi.riLimit = bi.restartIndex + 1 + } + } + bi.reset() + if bi.offsetStart > bi.offsetLimit { + bi.sErr(errors.New("leveldb/table: Reader: invalid slice range")) + } + } + return bi +} + +type dir int + +const ( + dirReleased dir = iota - 1 + dirSOI + dirEOI + dirBackward + dirForward +) + +type blockIter struct { + block *block + cache, releaser util.Releaser + key, value []byte + offset int + // Previous offset, only filled by Next. + prevOffset int + prevNode []int + prevKeys []byte + restartIndex int + // Iterator direction. + dir dir + // Restart index slice range. + riStart int + riLimit int + // Offset slice range. + offsetStart int + offsetRealStart int + offsetLimit int + // Error. + err error +} + +func (i *blockIter) sErr(err error) { + i.err = err + i.key = nil + i.value = nil + i.prevNode = nil + i.prevKeys = nil +} + +func (i *blockIter) reset() { + if i.dir == dirBackward { + i.prevNode = i.prevNode[:0] + i.prevKeys = i.prevKeys[:0] + } + i.restartIndex = i.riStart + i.offset = i.offsetStart + i.dir = dirSOI + i.key = i.key[:0] + i.value = nil +} + +func (i *blockIter) isFirst() bool { + switch i.dir { + case dirForward: + return i.prevOffset == i.offsetRealStart + case dirBackward: + return len(i.prevNode) == 1 && i.restartIndex == i.riStart + } + return false +} + +func (i *blockIter) isLast() bool { + switch i.dir { + case dirForward, dirBackward: + return i.offset == i.offsetLimit + } + return false +} + +func (i *blockIter) First() bool { + if i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + if i.dir == dirBackward { + i.prevNode = i.prevNode[:0] + i.prevKeys = i.prevKeys[:0] + } + i.dir = dirSOI + return i.Next() +} + +func (i *blockIter) Last() bool { + if i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + if i.dir == dirBackward { + i.prevNode = i.prevNode[:0] + i.prevKeys = i.prevKeys[:0] + } + i.dir = dirEOI + return i.Prev() +} + +func (i *blockIter) Seek(key []byte) bool { + if i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + ri, offset, err := i.block.seek(i.riStart, i.riLimit, key) + if err != nil { + i.sErr(err) + return false + } + i.restartIndex = ri + i.offset = max(i.offsetStart, offset) + if i.dir == dirSOI || i.dir == dirEOI { + i.dir = dirForward + } + for i.Next() { + if i.block.cmp.Compare(i.key, key) >= 0 { + return true + } + } + return false +} + +func (i *blockIter) Next() bool { + if i.dir == dirEOI || i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + if i.dir == dirSOI { + i.restartIndex = i.riStart + i.offset = i.offsetStart + } else if i.dir == dirBackward { + i.prevNode = i.prevNode[:0] + i.prevKeys = i.prevKeys[:0] + } + for i.offset < i.offsetRealStart { + key, value, nShared, n, err := i.block.entry(i.offset) + if err != nil { + i.sErr(err) + return false + } + if n == 0 { + i.dir = dirEOI + return false + } + i.key = append(i.key[:nShared], key...) + i.value = value + i.offset += n + } + if i.offset >= i.offsetLimit { + i.dir = dirEOI + if i.offset != i.offsetLimit { + i.sErr(errors.New("leveldb/table: Reader: Next: invalid block (block entries offset not aligned)")) + } + return false + } + key, value, nShared, n, err := i.block.entry(i.offset) + if err != nil { + i.sErr(err) + return false + } + if n == 0 { + i.dir = dirEOI + return false + } + i.key = append(i.key[:nShared], key...) + i.value = value + i.prevOffset = i.offset + i.offset += n + i.dir = dirForward + return true +} + +func (i *blockIter) Prev() bool { + if i.dir == dirSOI || i.err != nil { + return false + } else if i.dir == dirReleased { + i.err = ErrIterReleased + return false + } + + var ri int + if i.dir == dirForward { + // Change direction. + i.offset = i.prevOffset + if i.offset == i.offsetRealStart { + i.dir = dirSOI + return false + } + ri = i.block.restartIndex(i.restartIndex, i.riLimit, i.offset) + i.dir = dirBackward + } else if i.dir == dirEOI { + // At the end of iterator. + i.restartIndex = i.riLimit + i.offset = i.offsetLimit + if i.offset == i.offsetRealStart { + i.dir = dirSOI + return false + } + ri = i.riLimit - 1 + i.dir = dirBackward + } else if len(i.prevNode) == 1 { + // This is the end of a restart range. + i.offset = i.prevNode[0] + i.prevNode = i.prevNode[:0] + if i.restartIndex == i.riStart { + i.dir = dirSOI + return false + } + i.restartIndex-- + ri = i.restartIndex + } else { + // In the middle of restart range, get from cache. + n := len(i.prevNode) - 3 + node := i.prevNode[n:] + i.prevNode = i.prevNode[:n] + // Get the key. + ko := node[0] + i.key = append(i.key[:0], i.prevKeys[ko:]...) + i.prevKeys = i.prevKeys[:ko] + // Get the value. + vo := node[1] + vl := vo + node[2] + i.value = i.block.data[vo:vl] + i.offset = vl + return true + } + // Build entries cache. + i.key = i.key[:0] + i.value = nil + offset := i.block.restartOffset(ri) + if offset == i.offset { + ri -= 1 + if ri < 0 { + i.dir = dirSOI + return false + } + offset = i.block.restartOffset(ri) + } + i.prevNode = append(i.prevNode, offset) + for { + key, value, nShared, n, err := i.block.entry(offset) + if err != nil { + i.sErr(err) + return false + } + if offset >= i.offsetRealStart { + if i.value != nil { + // Appends 3 variables: + // 1. Previous keys offset + // 2. Value offset in the data block + // 3. Value length + i.prevNode = append(i.prevNode, len(i.prevKeys), offset-len(i.value), len(i.value)) + i.prevKeys = append(i.prevKeys, i.key...) + } + i.value = value + } + i.key = append(i.key[:nShared], key...) + offset += n + // Stop if target offset reached. + if offset >= i.offset { + if offset != i.offset { + i.sErr(errors.New("leveldb/table: Reader: Prev: invalid block (block entries offset not aligned)")) + return false + } + + break + } + } + i.restartIndex = ri + i.offset = offset + return true +} + +func (i *blockIter) Key() []byte { + if i.err != nil || i.dir <= dirEOI { + return nil + } + return i.key +} + +func (i *blockIter) Value() []byte { + if i.err != nil || i.dir <= dirEOI { + return nil + } + return i.value +} + +func (i *blockIter) Release() { + i.prevNode = nil + i.prevKeys = nil + i.key = nil + i.value = nil + i.dir = dirReleased + if i.cache != nil { + i.cache.Release() + i.cache = nil + } + if i.releaser != nil { + i.releaser.Release() + i.releaser = nil + } +} + +func (i *blockIter) SetReleaser(releaser util.Releaser) { + if i.dir > dirReleased { + i.releaser = releaser + } +} + +func (i *blockIter) Valid() bool { + return i.err == nil && (i.dir == dirBackward || i.dir == dirForward) +} + +func (i *blockIter) Error() error { + return i.err +} + +type filterBlock struct { + filter filter.Filter + data []byte + oOffset int + baseLg uint + filtersNum int +} + +func (b *filterBlock) contains(offset uint64, key []byte) bool { + i := int(offset >> b.baseLg) + if i < b.filtersNum { + o := b.data[b.oOffset+i*4:] + n := int(binary.LittleEndian.Uint32(o)) + m := int(binary.LittleEndian.Uint32(o[4:])) + if n < m && m <= b.oOffset { + return b.filter.Contains(b.data[n:m], key) + } else if n == m { + return false + } + } + return true +} + +type indexIter struct { + blockIter + tableReader *Reader + slice *util.Range + // Options + checksum bool + fillCache bool +} + +func (i *indexIter) Get() iterator.Iterator { + value := i.Value() + if value == nil { + return nil + } + dataBH, n := decodeBlockHandle(value) + if n == 0 { + return iterator.NewEmptyIterator(errors.New("leveldb/table: Reader: invalid table (bad data block handle)")) + } + var slice *util.Range + if i.slice != nil && (i.blockIter.isFirst() || i.blockIter.isLast()) { + slice = i.slice + } + return i.tableReader.getDataIter(dataBH, slice, i.checksum, i.fillCache) +} + +// Reader is a table reader. +type Reader struct { + reader io.ReaderAt + cache cache.Namespace + err error + // Options + cmp comparer.Comparer + filter filter.Filter + checksum bool + strictIter bool + + dataEnd int64 + indexBlock *block + filterBlock *filterBlock +} + +func verifyChecksum(data []byte) bool { + n := len(data) - 4 + checksum0 := binary.LittleEndian.Uint32(data[n:]) + checksum1 := util.NewCRC(data[:n]).Value() + return checksum0 == checksum1 +} + +func (r *Reader) readRawBlock(bh blockHandle, checksum bool) ([]byte, error) { + data := make([]byte, bh.length+blockTrailerLen) + if _, err := r.reader.ReadAt(data, int64(bh.offset)); err != nil && err != io.EOF { + return nil, err + } + if checksum || r.checksum { + if !verifyChecksum(data) { + return nil, errors.New("leveldb/table: Reader: invalid block (checksum mismatch)") + } + } + switch data[bh.length] { + case blockTypeNoCompression: + data = data[:bh.length] + case blockTypeSnappyCompression: + var err error + data, err = snappy.Decode(nil, data[:bh.length]) + if err != nil { + return nil, err + } + default: + return nil, fmt.Errorf("leveldb/table: Reader: unknown block compression type: %d", data[bh.length]) + } + return data, nil +} + +func (r *Reader) readBlock(bh blockHandle, checksum bool) (*block, error) { + data, err := r.readRawBlock(bh, checksum) + if err != nil { + return nil, err + } + restartsLen := int(binary.LittleEndian.Uint32(data[len(data)-4:])) + b := &block{ + cmp: r.cmp, + data: data, + restartsLen: restartsLen, + restartsOffset: len(data) - (restartsLen+1)*4, + checksum: checksum || r.checksum, + } + return b, nil +} + +func (r *Reader) readFilterBlock(bh blockHandle, filter filter.Filter) (*filterBlock, error) { + data, err := r.readRawBlock(bh, true) + if err != nil { + return nil, err + } + n := len(data) + if n < 5 { + return nil, errors.New("leveldb/table: Reader: invalid filter block (too short)") + } + m := n - 5 + oOffset := int(binary.LittleEndian.Uint32(data[m:])) + if oOffset > m { + return nil, errors.New("leveldb/table: Reader: invalid filter block (invalid offset)") + } + b := &filterBlock{ + filter: filter, + data: data, + oOffset: oOffset, + baseLg: uint(data[n-1]), + filtersNum: (m - oOffset) / 4, + } + return b, nil +} + +func (r *Reader) getDataIter(dataBH blockHandle, slice *util.Range, checksum, fillCache bool) iterator.Iterator { + if r.cache != nil { + // Get/set block cache. + var err error + cache, ok := r.cache.Get(dataBH.offset, func() (ok bool, value interface{}, charge int, fin cache.SetFin) { + if !fillCache { + return + } + var dataBlock *block + dataBlock, err = r.readBlock(dataBH, checksum) + if err == nil { + ok = true + value = dataBlock + charge = int(dataBH.length) + } + return + }) + if err != nil { + return iterator.NewEmptyIterator(err) + } + if ok { + dataBlock := cache.Value().(*block) + if !dataBlock.checksum && (r.checksum || checksum) { + if !verifyChecksum(dataBlock.data) { + return iterator.NewEmptyIterator(errors.New("leveldb/table: Reader: invalid block (checksum mismatch)")) + } + dataBlock.checksum = true + } + iter := dataBlock.newIterator(slice, false, cache) + return iter + } + } + dataBlock, err := r.readBlock(dataBH, checksum) + if err != nil { + return iterator.NewEmptyIterator(err) + } + iter := dataBlock.newIterator(slice, false, nil) + return iter +} + +// NewIterator creates an iterator from the table. +// +// Slice allows slicing the iterator to only contains keys in the given +// range. A nil Range.Start is treated as a key before all keys in the +// table. And a nil Range.Limit is treated as a key after all keys in +// the table. +// +// The returned iterator is not goroutine-safe and should be released +// when not used. +// +// Also read Iterator documentation of the leveldb/iterator package. + +func (r *Reader) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator { + if r.err != nil { + return iterator.NewEmptyIterator(r.err) + } + + index := &indexIter{ + blockIter: *r.indexBlock.newIterator(slice, true, nil), + tableReader: r, + slice: slice, + checksum: ro.GetStrict(opt.StrictBlockChecksum), + fillCache: !ro.GetDontFillCache(), + } + return iterator.NewIndexedIterator(index, r.strictIter || ro.GetStrict(opt.StrictIterator), false) +} + +// Find finds key/value pair whose key is greater than or equal to the +// given key. It returns ErrNotFound if the table doesn't contain +// such pair. +// +// The caller should not modify the contents of the returned slice, but +// it is safe to modify the contents of the argument after Find returns. +func (r *Reader) Find(key []byte, ro *opt.ReadOptions) (rkey, value []byte, err error) { + if r.err != nil { + err = r.err + return + } + + index := r.indexBlock.newIterator(nil, true, nil) + defer index.Release() + if !index.Seek(key) { + err = index.Error() + if err == nil { + err = ErrNotFound + } + return + } + dataBH, n := decodeBlockHandle(index.Value()) + if n == 0 { + err = errors.New("leveldb/table: Reader: invalid table (bad data block handle)") + return + } + if r.filterBlock != nil && !r.filterBlock.contains(dataBH.offset, key) { + err = ErrNotFound + return + } + data := r.getDataIter(dataBH, nil, ro.GetStrict(opt.StrictBlockChecksum), !ro.GetDontFillCache()) + defer data.Release() + if !data.Seek(key) { + err = data.Error() + if err == nil { + err = ErrNotFound + } + return + } + rkey = data.Key() + value = data.Value() + return +} + +// Get gets the value for the given key. It returns errors.ErrNotFound +// if the table does not contain the key. +// +// The caller should not modify the contents of the returned slice, but +// it is safe to modify the contents of the argument after Get returns. +func (r *Reader) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) { + if r.err != nil { + err = r.err + return + } + + rkey, value, err := r.Find(key, ro) + if err == nil && r.cmp.Compare(rkey, key) != 0 { + value = nil + err = ErrNotFound + } + return +} + +// OffsetOf returns approximate offset for the given key. +// +// It is safe to modify the contents of the argument after Get returns. +func (r *Reader) OffsetOf(key []byte) (offset int64, err error) { + if r.err != nil { + err = r.err + return + } + + index := r.indexBlock.newIterator(nil, true, nil) + defer index.Release() + if index.Seek(key) { + dataBH, n := decodeBlockHandle(index.Value()) + if n == 0 { + err = errors.New("leveldb/table: Reader: invalid table (bad data block handle)") + return + } + offset = int64(dataBH.offset) + return + } + err = index.Error() + if err == nil { + offset = r.dataEnd + } + return +} + +// NewReader creates a new initialized table reader for the file. +// The cache is optional and can be nil. +func NewReader(f io.ReaderAt, size int64, cache cache.Namespace, o *opt.Options) *Reader { + r := &Reader{ + reader: f, + cache: cache, + cmp: o.GetComparer(), + checksum: o.GetStrict(opt.StrictBlockChecksum), + strictIter: o.GetStrict(opt.StrictIterator), + } + if f == nil { + r.err = errors.New("leveldb/table: Reader: nil file") + return r + } + if size < footerLen { + r.err = errors.New("leveldb/table: Reader: invalid table (file size is too small)") + return r + } + var footer [footerLen]byte + if _, err := r.reader.ReadAt(footer[:], size-footerLen); err != nil && err != io.EOF { + r.err = fmt.Errorf("leveldb/table: Reader: invalid table (could not read footer): %v", err) + } + if string(footer[footerLen-len(magic):footerLen]) != magic { + r.err = errors.New("leveldb/table: Reader: invalid table (bad magic number)") + return r + } + // Decode the metaindex block handle. + metaBH, n := decodeBlockHandle(footer[:]) + if n == 0 { + r.err = errors.New("leveldb/table: Reader: invalid table (bad metaindex block handle)") + return r + } + // Decode the index block handle. + indexBH, n := decodeBlockHandle(footer[n:]) + if n == 0 { + r.err = errors.New("leveldb/table: Reader: invalid table (bad index block handle)") + return r + } + // Read index block. + r.indexBlock, r.err = r.readBlock(indexBH, true) + if r.err != nil { + return r + } + // Read metaindex block. + metaBlock, err := r.readBlock(metaBH, true) + if err != nil { + r.err = err + return r + } + // Set data end. + r.dataEnd = int64(metaBH.offset) + metaIter := metaBlock.newIterator(nil, false, nil) + for metaIter.Next() { + key := string(metaIter.Key()) + if !strings.HasPrefix(key, "filter.") { + continue + } + fn := key[7:] + var filter filter.Filter + if f0 := o.GetFilter(); f0 != nil && f0.Name() == fn { + filter = f0 + } else { + for _, f0 := range o.GetAltFilters() { + if f0.Name() == fn { + filter = f0 + break + } + } + } + if filter != nil { + filterBH, n := decodeBlockHandle(metaIter.Value()) + if n == 0 { + continue + } + // Update data end. + r.dataEnd = int64(filterBH.offset) + filterBlock, err := r.readFilterBlock(filterBH, filter) + if err != nil { + continue + } + r.filterBlock = filterBlock + break + } + } + metaIter.Release() + return r +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table.go new file mode 100644 index 000000000..c0ac70d9e --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table.go @@ -0,0 +1,177 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package table allows read and write sorted key/value. +package table + +import ( + "encoding/binary" +) + +/* +Table: + +Table is consist of one or more data blocks, an optional filter block +a metaindex block, an index block and a table footer. Metaindex block +is a special block used to keep parameters of the table, such as filter +block name and its block handle. Index block is a special block used to +keep record of data blocks offset and length, index block use one as +restart interval. The key used by index block are the last key of preceding +block, shorter separator of adjacent blocks or shorter successor of the +last key of the last block. Filter block is an optional block contains +sequence of filter data generated by a filter generator. + +Table data structure: + + optional + / + +--------------+--------------+--------------+------+-------+-----------------+-------------+--------+ + | data block 1 | ... | data block n | filter block | metaindex block | index block | footer | + +--------------+--------------+--------------+--------------+-----------------+-------------+--------+ + + Each block followed by a 5-bytes trailer contains compression type and checksum. + +Table block trailer: + + +---------------------------+-------------------+ + | compression type (1-byte) | checksum (4-byte) | + +---------------------------+-------------------+ + + The checksum is a CRC-32 computed using Castagnoli's polynomial. Compression + type also included in the checksum. + +Table footer: + + +------------------- 40-bytes -------------------+ + / \ + +------------------------+--------------------+------+-----------------+ + | metaindex block handle / index block handle / ---- | magic (8-bytes) | + +------------------------+--------------------+------+-----------------+ + + The magic are first 64-bit of SHA-1 sum of "http://code.google.com/p/leveldb/". + +NOTE: All fixed-length integer are little-endian. +*/ + +/* +Block: + +Block is consist of one or more key/value entries and a block trailer. +Block entry shares key prefix with its preceding key until a restart +point reached. A block should contains at least one restart point. +First restart point are always zero. + +Block data structure: + + + restart point + restart point (depends on restart interval) + / / + +---------------+---------------+---------------+---------------+---------+ + | block entry 1 | block entry 2 | ... | block entry n | trailer | + +---------------+---------------+---------------+---------------+---------+ + +Key/value entry: + + +---- key len ----+ + / \ + +-------+---------+-----------+---------+--------------------+--------------+----------------+ + | shared (varint) | not shared (varint) | value len (varint) | key (varlen) | value (varlen) | + +-----------------+---------------------+--------------------+--------------+----------------+ + + Block entry shares key prefix with its preceding key: + Conditions: + restart_interval=2 + entry one : key=deck,value=v1 + entry two : key=dock,value=v2 + entry three: key=duck,value=v3 + The entries will be encoded as follow: + + + restart point (offset=0) + restart point (offset=16) + / / + +-----+-----+-----+----------+--------+-----+-----+-----+---------+--------+-----+-----+-----+----------+--------+ + | 0 | 4 | 2 | "deck" | "v1" | 1 | 3 | 2 | "ock" | "v2" | 0 | 4 | 2 | "duck" | "v3" | + +-----+-----+-----+----------+--------+-----+-----+-----+---------+--------+-----+-----+-----+----------+--------+ + \ / \ / \ / + +----------- entry one -----------+ +----------- entry two ----------+ +---------- entry three ----------+ + + The block trailer will contains two restart points: + + +------------+-----------+--------+ + | 0 | 16 | 2 | + +------------+-----------+---+----+ + \ / \ + +-- restart points --+ + restart points length + +Block trailer: + + +-- 4-bytes --+ + / \ + +-----------------+-----------------+-----------------+------------------------------+ + | restart point 1 | .... | restart point n | restart points len (4-bytes) | + +-----------------+-----------------+-----------------+------------------------------+ + + +NOTE: All fixed-length integer are little-endian. +*/ + +/* +Filter block: + +Filter block consist of one or more filter data and a filter block trailer. +The trailer contains filter data offsets, a trailer offset and a 1-byte base Lg. + +Filter block data structure: + + + offset 1 + offset 2 + offset n + trailer offset + / / / / + +---------------+---------------+---------------+---------+ + | filter data 1 | ... | filter data n | trailer | + +---------------+---------------+---------------+---------+ + +Filter block trailer: + + +- 4-bytes -+ + / \ + +---------------+---------------+---------------+-------------------------+------------------+ + | offset 1 | .... | offset n | filter offset (4-bytes) | base Lg (1-byte) | + +-------------- +---------------+---------------+-------------------------+------------------+ + + +NOTE: All fixed-length integer are little-endian. +*/ + +const ( + blockTrailerLen = 5 + footerLen = 48 + + magic = "\x57\xfb\x80\x8b\x24\x75\x47\xdb" + + // The block type gives the per-block compression format. + // These constants are part of the file format and should not be changed. + blockTypeNoCompression = 0 + blockTypeSnappyCompression = 1 + + // Generate new filter every 2KB of data + filterBaseLg = 11 + filterBase = 1 << filterBaseLg +) + +type blockHandle struct { + offset, length uint64 +} + +func decodeBlockHandle(src []byte) (blockHandle, int) { + offset, n := binary.Uvarint(src) + length, m := binary.Uvarint(src[n:]) + if n == 0 || m == 0 { + return blockHandle{}, 0 + } + return blockHandle{offset, length}, n + m +} + +func encodeBlockHandle(dst []byte, b blockHandle) int { + n := binary.PutUvarint(dst, b.offset) + m := binary.PutUvarint(dst[n:], b.length) + return n + m +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_suite_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_suite_test.go new file mode 100644 index 000000000..bc9eb83cc --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_suite_test.go @@ -0,0 +1,17 @@ +package table + +import ( + "testing" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + + "github.com/syndtr/goleveldb/leveldb/testutil" +) + +func TestTable(t *testing.T) { + testutil.RunDefer() + + RegisterFailHandler(Fail) + RunSpecs(t, "Table Suite") +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_test.go new file mode 100644 index 000000000..d7d3b2a4b --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/table_test.go @@ -0,0 +1,119 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package table + +import ( + "bytes" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/testutil" + "github.com/syndtr/goleveldb/leveldb/util" +) + +type tableWrapper struct { + *Reader +} + +func (t tableWrapper) TestFind(key []byte) (rkey, rvalue []byte, err error) { + return t.Reader.Find(key, nil) +} + +func (t tableWrapper) TestGet(key []byte) (value []byte, err error) { + return t.Reader.Get(key, nil) +} + +func (t tableWrapper) TestNewIterator(slice *util.Range) iterator.Iterator { + return t.Reader.NewIterator(slice, nil) +} + +var _ = testutil.Defer(func() { + Describe("Table", func() { + Describe("approximate offset test", func() { + var ( + buf = &bytes.Buffer{} + o = &opt.Options{ + BlockSize: 1024, + Compression: opt.NoCompression, + } + ) + + // Building the table. + tw := NewWriter(buf, o) + tw.Append([]byte("k01"), []byte("hello")) + tw.Append([]byte("k02"), []byte("hello2")) + tw.Append([]byte("k03"), bytes.Repeat([]byte{'x'}, 10000)) + tw.Append([]byte("k04"), bytes.Repeat([]byte{'x'}, 200000)) + tw.Append([]byte("k05"), bytes.Repeat([]byte{'x'}, 300000)) + tw.Append([]byte("k06"), []byte("hello3")) + tw.Append([]byte("k07"), bytes.Repeat([]byte{'x'}, 100000)) + err := tw.Close() + + It("Should be able to approximate offset of a key correctly", func() { + Expect(err).ShouldNot(HaveOccurred()) + + tr := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()), nil, o) + CheckOffset := func(key string, expect, threshold int) { + offset, err := tr.OffsetOf([]byte(key)) + Expect(err).ShouldNot(HaveOccurred()) + Expect(offset).Should(BeNumerically("~", expect, threshold), "Offset of key %q", key) + } + + CheckOffset("k0", 0, 0) + CheckOffset("k01a", 0, 0) + CheckOffset("k02", 0, 0) + CheckOffset("k03", 0, 0) + CheckOffset("k04", 10000, 1000) + CheckOffset("k04a", 210000, 1000) + CheckOffset("k05", 210000, 1000) + CheckOffset("k06", 510000, 1000) + CheckOffset("k07", 510000, 1000) + CheckOffset("xyz", 610000, 2000) + }) + }) + + Describe("read test", func() { + Build := func(kv testutil.KeyValue) testutil.DB { + o := &opt.Options{ + BlockSize: 512, + BlockRestartInterval: 3, + } + buf := &bytes.Buffer{} + + // Building the table. + tw := NewWriter(buf, o) + kv.Iterate(func(i int, key, value []byte) { + tw.Append(key, value) + }) + tw.Close() + + // Opening the table. + tr := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()), nil, o) + return tableWrapper{tr} + } + Test := func(kv *testutil.KeyValue, body func(r *Reader)) func() { + return func() { + db := Build(*kv) + if body != nil { + body(db.(tableWrapper).Reader) + } + testutil.KeyValueTesting(nil, db, *kv) + } + } + + testutil.AllKeyValueTesting(nil, Build) + Describe("with one key per block", Test(testutil.KeyValue_Generate(nil, 9, 1, 10, 512, 512), func(r *Reader) { + It("should have correct blocks number", func() { + Expect(r.indexBlock.restartsLen).Should(Equal(9)) + }) + })) + }) + }) +}) diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/writer.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/writer.go new file mode 100644 index 000000000..4e19e93a9 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/writer.go @@ -0,0 +1,379 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package table + +import ( + "encoding/binary" + "errors" + "fmt" + "io" + + "code.google.com/p/snappy-go/snappy" + + "github.com/syndtr/goleveldb/leveldb/comparer" + "github.com/syndtr/goleveldb/leveldb/filter" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/util" +) + +func sharedPrefixLen(a, b []byte) int { + i, n := 0, len(a) + if n > len(b) { + n = len(b) + } + for i < n && a[i] == b[i] { + i++ + } + return i +} + +type blockWriter struct { + restartInterval int + buf util.Buffer + nEntries int + prevKey []byte + restarts []uint32 + scratch []byte +} + +func (w *blockWriter) append(key, value []byte) { + nShared := 0 + if w.nEntries%w.restartInterval == 0 { + w.restarts = append(w.restarts, uint32(w.buf.Len())) + } else { + nShared = sharedPrefixLen(w.prevKey, key) + } + n := binary.PutUvarint(w.scratch[0:], uint64(nShared)) + n += binary.PutUvarint(w.scratch[n:], uint64(len(key)-nShared)) + n += binary.PutUvarint(w.scratch[n:], uint64(len(value))) + w.buf.Write(w.scratch[:n]) + w.buf.Write(key[nShared:]) + w.buf.Write(value) + w.prevKey = append(w.prevKey[:0], key...) + w.nEntries++ +} + +func (w *blockWriter) finish() { + // Write restarts entry. + if w.nEntries == 0 { + // Must have at least one restart entry. + w.restarts = append(w.restarts, 0) + } + w.restarts = append(w.restarts, uint32(len(w.restarts))) + for _, x := range w.restarts { + buf4 := w.buf.Alloc(4) + binary.LittleEndian.PutUint32(buf4, x) + } +} + +func (w *blockWriter) reset() { + w.buf.Reset() + w.nEntries = 0 + w.restarts = w.restarts[:0] +} + +func (w *blockWriter) bytesLen() int { + restartsLen := len(w.restarts) + if restartsLen == 0 { + restartsLen = 1 + } + return w.buf.Len() + 4*restartsLen + 4 +} + +type filterWriter struct { + generator filter.FilterGenerator + buf util.Buffer + nKeys int + offsets []uint32 +} + +func (w *filterWriter) add(key []byte) { + if w.generator == nil { + return + } + w.generator.Add(key) + w.nKeys++ +} + +func (w *filterWriter) flush(offset uint64) { + if w.generator == nil { + return + } + for x := int(offset / filterBase); x > len(w.offsets); { + w.generate() + } +} + +func (w *filterWriter) finish() { + if w.generator == nil { + return + } + // Generate last keys. + + if w.nKeys > 0 { + w.generate() + } + w.offsets = append(w.offsets, uint32(w.buf.Len())) + for _, x := range w.offsets { + buf4 := w.buf.Alloc(4) + binary.LittleEndian.PutUint32(buf4, x) + } + w.buf.WriteByte(filterBaseLg) +} + +func (w *filterWriter) generate() { + // Record offset. + w.offsets = append(w.offsets, uint32(w.buf.Len())) + // Generate filters. + if w.nKeys > 0 { + w.generator.Generate(&w.buf) + w.nKeys = 0 + } +} + +// Writer is a table writer. +type Writer struct { + writer io.Writer + err error + // Options + cmp comparer.Comparer + filter filter.Filter + compression opt.Compression + blockSize int + + dataBlock blockWriter + indexBlock blockWriter + filterBlock filterWriter + pendingBH blockHandle + offset uint64 + nEntries int + // Scratch allocated enough for 5 uvarint. Block writer should not use + // first 20-bytes since it will be used to encode block handle, which + // then passed to the block writer itself. + scratch [50]byte + comparerScratch []byte + compressionScratch []byte +} + +func (w *Writer) writeBlock(buf *util.Buffer, compression opt.Compression) (bh blockHandle, err error) { + // Compress the buffer if necessary. + var b []byte + if compression == opt.SnappyCompression { + // Allocate scratch enough for compression and block trailer. + if n := snappy.MaxEncodedLen(buf.Len()) + blockTrailerLen; len(w.compressionScratch) < n { + w.compressionScratch = make([]byte, n) + } + var compressed []byte + compressed, err = snappy.Encode(w.compressionScratch, buf.Bytes()) + if err != nil { + return + } + n := len(compressed) + b = compressed[:n+blockTrailerLen] + b[n] = blockTypeSnappyCompression + } else { + tmp := buf.Alloc(blockTrailerLen) + tmp[0] = blockTypeNoCompression + b = buf.Bytes() + } + + // Calculate the checksum. + n := len(b) - 4 + checksum := util.NewCRC(b[:n]).Value() + binary.LittleEndian.PutUint32(b[n:], checksum) + + // Write the buffer to the file. + _, err = w.writer.Write(b) + if err != nil { + return + } + bh = blockHandle{w.offset, uint64(len(b) - blockTrailerLen)} + w.offset += uint64(len(b)) + return +} + +func (w *Writer) flushPendingBH(key []byte) { + if w.pendingBH.length == 0 { + return + } + var separator []byte + if len(key) == 0 { + separator = w.cmp.Successor(w.comparerScratch[:0], w.dataBlock.prevKey) + } else { + separator = w.cmp.Separator(w.comparerScratch[:0], w.dataBlock.prevKey, key) + } + if separator == nil { + separator = w.dataBlock.prevKey + } else { + w.comparerScratch = separator + } + n := encodeBlockHandle(w.scratch[:20], w.pendingBH) + // Append the block handle to the index block. + w.indexBlock.append(separator, w.scratch[:n]) + // Reset prev key of the data block. + w.dataBlock.prevKey = w.dataBlock.prevKey[:0] + // Clear pending block handle. + w.pendingBH = blockHandle{} +} + +func (w *Writer) finishBlock() error { + w.dataBlock.finish() + bh, err := w.writeBlock(&w.dataBlock.buf, w.compression) + if err != nil { + return err + } + w.pendingBH = bh + // Reset the data block. + w.dataBlock.reset() + // Flush the filter block. + w.filterBlock.flush(w.offset) + return nil +} + +// Append appends key/value pair to the table. The keys passed must +// be in increasing order. +// +// It is safe to modify the contents of the arguments after Append returns. +func (w *Writer) Append(key, value []byte) error { + if w.err != nil { + return w.err + } + if w.nEntries > 0 && w.cmp.Compare(w.dataBlock.prevKey, key) >= 0 { + w.err = fmt.Errorf("leveldb/table: Writer: keys are not in increasing order: %q, %q", w.dataBlock.prevKey, key) + return w.err + } + + w.flushPendingBH(key) + // Append key/value pair to the data block. + w.dataBlock.append(key, value) + // Add key to the filter block. + w.filterBlock.add(key) + + // Finish the data block if block size target reached. + if w.dataBlock.bytesLen() >= w.blockSize { + if err := w.finishBlock(); err != nil { + w.err = err + return w.err + } + } + w.nEntries++ + return nil +} + +// BlocksLen returns number of blocks written so far. +func (w *Writer) BlocksLen() int { + n := w.indexBlock.nEntries + if w.pendingBH.length > 0 { + // Includes the pending block. + n++ + } + return n +} + +// EntriesLen returns number of entries added so far. +func (w *Writer) EntriesLen() int { + return w.nEntries +} + +// BytesLen returns number of bytes written so far. +func (w *Writer) BytesLen() int { + return int(w.offset) +} + +// Close will finalize the table. Calling Append is not possible +// after Close, but calling BlocksLen, EntriesLen and BytesLen +// is still possible. +func (w *Writer) Close() error { + if w.err != nil { + return w.err + } + + // Write the last data block. Or empty data block if there + // aren't any data blocks at all. + if w.dataBlock.nEntries > 0 || w.nEntries == 0 { + if err := w.finishBlock(); err != nil { + w.err = err + return w.err + } + } + w.flushPendingBH(nil) + + // Write the filter block. + var filterBH blockHandle + w.filterBlock.finish() + if buf := &w.filterBlock.buf; buf.Len() > 0 { + filterBH, w.err = w.writeBlock(buf, opt.NoCompression) + if w.err != nil { + return w.err + } + } + + // Write the metaindex block. + if filterBH.length > 0 { + key := []byte("filter." + w.filter.Name()) + n := encodeBlockHandle(w.scratch[:20], filterBH) + w.dataBlock.append(key, w.scratch[:n]) + } + w.dataBlock.finish() + metaindexBH, err := w.writeBlock(&w.dataBlock.buf, w.compression) + if err != nil { + w.err = err + return w.err + } + + // Write the index block. + w.indexBlock.finish() + indexBH, err := w.writeBlock(&w.indexBlock.buf, w.compression) + if err != nil { + w.err = err + return w.err + } + + // Write the table footer. + footer := w.scratch[:footerLen] + for i := range footer { + footer[i] = 0 + } + n := encodeBlockHandle(footer, metaindexBH) + encodeBlockHandle(footer[n:], indexBH) + copy(footer[footerLen-len(magic):], magic) + if _, err := w.writer.Write(footer); err != nil { + w.err = err + return w.err + } + w.offset += footerLen + + w.err = errors.New("leveldb/table: writer is closed") + return nil +} + +// NewWriter creates a new initialized table writer for the file. +// +// Table writer is not goroutine-safe. +func NewWriter(f io.Writer, o *opt.Options) *Writer { + w := &Writer{ + writer: f, + cmp: o.GetComparer(), + filter: o.GetFilter(), + compression: o.GetCompression(), + blockSize: o.GetBlockSize(), + comparerScratch: make([]byte, 0), + } + // data block + w.dataBlock.restartInterval = o.GetBlockRestartInterval() + // The first 20-bytes are used for encoding block handle. + w.dataBlock.scratch = w.scratch[20:] + // index block + w.indexBlock.restartInterval = 1 + w.indexBlock.scratch = w.scratch[20:] + // filter block + if w.filter != nil { + w.filterBlock.generator = w.filter.NewGenerator() + w.filterBlock.flush(0) + } + return w +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/db.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/db.go new file mode 100644 index 000000000..4b87b5ef6 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/db.go @@ -0,0 +1,216 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package testutil + +import ( + "fmt" + "math/rand" + + . "github.com/onsi/gomega" + + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/util" +) + +type DB interface{} + +type Put interface { + TestPut(key []byte, value []byte) error +} + +type Delete interface { + TestDelete(key []byte) error +} + +type Find interface { + TestFind(key []byte) (rkey, rvalue []byte, err error) +} + +type Get interface { + TestGet(key []byte) (value []byte, err error) +} + +type NewIterator interface { + TestNewIterator(slice *util.Range) iterator.Iterator +} + +type DBAct int + +func (a DBAct) String() string { + switch a { + case DBNone: + return "none" + case DBPut: + return "put" + case DBOverwrite: + return "overwrite" + case DBDelete: + return "delete" + case DBDeleteNA: + return "delete_na" + } + return "unknown" +} + +const ( + DBNone DBAct = iota + DBPut + DBOverwrite + DBDelete + DBDeleteNA +) + +type DBTesting struct { + Rand *rand.Rand + DB interface { + Get + Put + Delete + } + PostFn func(t *DBTesting) + Deleted, Present KeyValue + Act, LastAct DBAct + ActKey, LastActKey []byte +} + +func (t *DBTesting) post() { + if t.PostFn != nil { + t.PostFn(t) + } +} + +func (t *DBTesting) setAct(act DBAct, key []byte) { + t.LastAct, t.Act = t.Act, act + t.LastActKey, t.ActKey = t.ActKey, key +} + +func (t *DBTesting) text() string { + return fmt.Sprintf("last action was <%v> %q, <%v> %q", t.LastAct, t.LastActKey, t.Act, t.ActKey) +} + +func (t *DBTesting) Text() string { + return "DBTesting " + t.text() +} + +func (t *DBTesting) TestPresentKV(key, value []byte) { + rvalue, err := t.DB.TestGet(key) + Expect(err).ShouldNot(HaveOccurred(), "Get on key %q, %s", key, t.text()) + Expect(rvalue).Should(Equal(value), "Value for key %q, %s", key, t.text()) +} + +func (t *DBTesting) TestAllPresent() { + t.Present.IterateShuffled(t.Rand, func(i int, key, value []byte) { + t.TestPresentKV(key, value) + }) +} + +func (t *DBTesting) TestDeletedKey(key []byte) { + _, err := t.DB.TestGet(key) + Expect(err).Should(Equal(util.ErrNotFound), "Get on deleted key %q, %s", key, t.text()) +} + +func (t *DBTesting) TestAllDeleted() { + t.Deleted.IterateShuffled(t.Rand, func(i int, key, value []byte) { + t.TestDeletedKey(key) + }) +} + +func (t *DBTesting) TestAll() { + dn := t.Deleted.Len() + pn := t.Present.Len() + ShuffledIndex(t.Rand, dn+pn, 1, func(i int) { + if i >= dn { + key, value := t.Present.Index(i - dn) + t.TestPresentKV(key, value) + } else { + t.TestDeletedKey(t.Deleted.KeyAt(i)) + } + }) +} + +func (t *DBTesting) Put(key, value []byte) { + if new := t.Present.PutU(key, value); new { + t.setAct(DBPut, key) + } else { + t.setAct(DBOverwrite, key) + } + t.Deleted.Delete(key) + err := t.DB.TestPut(key, value) + Expect(err).ShouldNot(HaveOccurred(), t.Text()) + t.TestPresentKV(key, value) + t.post() +} + +func (t *DBTesting) PutRandom() bool { + if t.Deleted.Len() > 0 { + i := t.Rand.Intn(t.Deleted.Len()) + key, value := t.Deleted.Index(i) + t.Put(key, value) + return true + } + return false +} + +func (t *DBTesting) Delete(key []byte) { + if exist, value := t.Present.Delete(key); exist { + t.setAct(DBDelete, key) + t.Deleted.PutU(key, value) + } else { + t.setAct(DBDeleteNA, key) + } + err := t.DB.TestDelete(key) + Expect(err).ShouldNot(HaveOccurred(), t.Text()) + t.TestDeletedKey(key) + t.post() +} + +func (t *DBTesting) DeleteRandom() bool { + if t.Present.Len() > 0 { + i := t.Rand.Intn(t.Present.Len()) + t.Delete(t.Present.KeyAt(i)) + return true + } + return false +} + +func (t *DBTesting) RandomAct(round int) { + for i := 0; i < round; i++ { + if t.Rand.Int()%2 == 0 { + t.PutRandom() + } else { + t.DeleteRandom() + } + } +} + +func DoDBTesting(t *DBTesting) { + if t.Rand == nil { + t.Rand = NewRand() + } + + t.DeleteRandom() + t.PutRandom() + t.DeleteRandom() + t.DeleteRandom() + for i := t.Deleted.Len() / 2; i >= 0; i-- { + t.PutRandom() + } + t.RandomAct((t.Deleted.Len() + t.Present.Len()) * 10) + + // Additional iterator testing + if db, ok := t.DB.(NewIterator); ok { + iter := db.TestNewIterator(nil) + Expect(iter.Error()).NotTo(HaveOccurred()) + + it := IteratorTesting{ + KeyValue: t.Present, + Iter: iter, + } + + DoIteratorTesting(&it) + } +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/iter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/iter.go new file mode 100644 index 000000000..df6d9db6a --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/iter.go @@ -0,0 +1,327 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package testutil + +import ( + "fmt" + "math/rand" + + . "github.com/onsi/gomega" + + "github.com/syndtr/goleveldb/leveldb/iterator" +) + +type IterAct int + +func (a IterAct) String() string { + switch a { + case IterNone: + return "none" + case IterFirst: + return "first" + case IterLast: + return "last" + case IterPrev: + return "prev" + case IterNext: + return "next" + case IterSeek: + return "seek" + case IterSOI: + return "soi" + case IterEOI: + return "eoi" + } + return "unknown" +} + +const ( + IterNone IterAct = iota + IterFirst + IterLast + IterPrev + IterNext + IterSeek + IterSOI + IterEOI +) + +type IteratorTesting struct { + KeyValue + Iter iterator.Iterator + Rand *rand.Rand + PostFn func(t *IteratorTesting) + Pos int + Act, LastAct IterAct + + once bool +} + +func (t *IteratorTesting) init() { + if !t.once { + t.Pos = -1 + t.once = true + } +} + +func (t *IteratorTesting) post() { + if t.PostFn != nil { + t.PostFn(t) + } +} + +func (t *IteratorTesting) setAct(act IterAct) { + t.LastAct, t.Act = t.Act, act +} + +func (t *IteratorTesting) text() string { + return fmt.Sprintf("at pos %d and last action was <%v> -> <%v>", t.Pos, t.LastAct, t.Act) +} + +func (t *IteratorTesting) Text() string { + return "IteratorTesting is " + t.text() +} + +func (t *IteratorTesting) IsFirst() bool { + t.init() + return t.Len() > 0 && t.Pos == 0 +} + +func (t *IteratorTesting) IsLast() bool { + t.init() + return t.Len() > 0 && t.Pos == t.Len()-1 +} + +func (t *IteratorTesting) TestKV() { + t.init() + key, value := t.Index(t.Pos) + Expect(t.Iter.Key()).NotTo(BeNil()) + Expect(t.Iter.Key()).Should(Equal(key), "Key is invalid, %s", t.text()) + Expect(t.Iter.Value()).Should(Equal(value), "Value for key %q, %s", key, t.text()) +} + +func (t *IteratorTesting) First() { + t.init() + t.setAct(IterFirst) + + ok := t.Iter.First() + Expect(t.Iter.Error()).ShouldNot(HaveOccurred()) + if t.Len() > 0 { + t.Pos = 0 + Expect(ok).Should(BeTrue(), t.Text()) + t.TestKV() + } else { + t.Pos = -1 + Expect(ok).ShouldNot(BeTrue(), t.Text()) + } + t.post() +} + +func (t *IteratorTesting) Last() { + t.init() + t.setAct(IterLast) + + ok := t.Iter.Last() + Expect(t.Iter.Error()).ShouldNot(HaveOccurred()) + if t.Len() > 0 { + t.Pos = t.Len() - 1 + Expect(ok).Should(BeTrue(), t.Text()) + t.TestKV() + } else { + t.Pos = 0 + Expect(ok).ShouldNot(BeTrue(), t.Text()) + } + t.post() +} + +func (t *IteratorTesting) Next() { + t.init() + t.setAct(IterNext) + + ok := t.Iter.Next() + Expect(t.Iter.Error()).ShouldNot(HaveOccurred()) + if t.Pos < t.Len()-1 { + t.Pos++ + Expect(ok).Should(BeTrue(), t.Text()) + t.TestKV() + } else { + t.Pos = t.Len() + Expect(ok).ShouldNot(BeTrue(), t.Text()) + } + t.post() +} + +func (t *IteratorTesting) Prev() { + t.init() + t.setAct(IterPrev) + + ok := t.Iter.Prev() + Expect(t.Iter.Error()).ShouldNot(HaveOccurred()) + if t.Pos > 0 { + t.Pos-- + Expect(ok).Should(BeTrue(), t.Text()) + t.TestKV() + } else { + t.Pos = -1 + Expect(ok).ShouldNot(BeTrue(), t.Text()) + } + t.post() +} + +func (t *IteratorTesting) Seek(i int) { + t.init() + t.setAct(IterSeek) + + key, _ := t.Index(i) + oldKey, _ := t.IndexOrNil(t.Pos) + + ok := t.Iter.Seek(key) + Expect(t.Iter.Error()).ShouldNot(HaveOccurred()) + Expect(ok).Should(BeTrue(), fmt.Sprintf("Seek from key %q to %q, to pos %d, %s", oldKey, key, i, t.text())) + + t.Pos = i + t.TestKV() + t.post() +} + +func (t *IteratorTesting) SeekInexact(i int) { + t.init() + t.setAct(IterSeek) + var key0 []byte + key1, _ := t.Index(i) + if i > 0 { + key0, _ = t.Index(i - 1) + } + key := BytesSeparator(key0, key1) + oldKey, _ := t.IndexOrNil(t.Pos) + + ok := t.Iter.Seek(key) + Expect(t.Iter.Error()).ShouldNot(HaveOccurred()) + Expect(ok).Should(BeTrue(), fmt.Sprintf("Seek from key %q to %q (%q), to pos %d, %s", oldKey, key, key1, i, t.text())) + + t.Pos = i + t.TestKV() + t.post() +} + +func (t *IteratorTesting) SeekKey(key []byte) { + t.init() + t.setAct(IterSeek) + oldKey, _ := t.IndexOrNil(t.Pos) + i := t.Search(key) + + ok := t.Iter.Seek(key) + Expect(t.Iter.Error()).ShouldNot(HaveOccurred()) + if i < t.Len() { + key_, _ := t.Index(i) + Expect(ok).Should(BeTrue(), fmt.Sprintf("Seek from key %q to %q (%q), to pos %d, %s", oldKey, key, key_, i, t.text())) + t.Pos = i + t.TestKV() + } else { + Expect(ok).ShouldNot(BeTrue(), fmt.Sprintf("Seek from key %q to %q, %s", oldKey, key, t.text())) + } + + t.Pos = i + t.post() +} + +func (t *IteratorTesting) SOI() { + t.init() + t.setAct(IterSOI) + Expect(t.Pos).Should(BeNumerically("<=", 0), t.Text()) + for i := 0; i < 3; i++ { + t.Prev() + } + t.post() +} + +func (t *IteratorTesting) EOI() { + t.init() + t.setAct(IterEOI) + Expect(t.Pos).Should(BeNumerically(">=", t.Len()-1), t.Text()) + for i := 0; i < 3; i++ { + t.Next() + } + t.post() +} + +func (t *IteratorTesting) WalkPrev(fn func(t *IteratorTesting)) { + t.init() + for old := t.Pos; t.Pos > 0; old = t.Pos { + fn(t) + Expect(t.Pos).Should(BeNumerically("<", old), t.Text()) + } +} + +func (t *IteratorTesting) WalkNext(fn func(t *IteratorTesting)) { + t.init() + for old := t.Pos; t.Pos < t.Len()-1; old = t.Pos { + fn(t) + Expect(t.Pos).Should(BeNumerically(">", old), t.Text()) + } +} + +func (t *IteratorTesting) PrevAll() { + t.WalkPrev(func(t *IteratorTesting) { + t.Prev() + }) +} + +func (t *IteratorTesting) NextAll() { + t.WalkNext(func(t *IteratorTesting) { + t.Next() + }) +} + +func DoIteratorTesting(t *IteratorTesting) { + if t.Rand == nil { + t.Rand = NewRand() + } + t.SOI() + t.NextAll() + t.First() + t.SOI() + t.NextAll() + t.EOI() + t.PrevAll() + t.Last() + t.EOI() + t.PrevAll() + t.SOI() + + t.NextAll() + t.PrevAll() + t.NextAll() + t.Last() + t.PrevAll() + t.First() + t.NextAll() + t.EOI() + + ShuffledIndex(t.Rand, t.Len(), 1, func(i int) { + t.Seek(i) + }) + + ShuffledIndex(t.Rand, t.Len(), 1, func(i int) { + t.SeekInexact(i) + }) + + ShuffledIndex(t.Rand, t.Len(), 1, func(i int) { + t.Seek(i) + if i%2 != 0 { + t.PrevAll() + t.SOI() + } else { + t.NextAll() + t.EOI() + } + }) + + for _, key := range []string{"", "foo", "bar", "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"} { + t.SeekKey([]byte(key)) + } +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/kv.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/kv.go new file mode 100644 index 000000000..471d5708c --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/kv.go @@ -0,0 +1,352 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package testutil + +import ( + "fmt" + "math/rand" + "sort" + "strings" + + "github.com/syndtr/goleveldb/leveldb/util" +) + +type KeyValueEntry struct { + key, value []byte +} + +type KeyValue struct { + entries []KeyValueEntry + nbytes int +} + +func (kv *KeyValue) Put(key, value []byte) { + if n := len(kv.entries); n > 0 && cmp.Compare(kv.entries[n-1].key, key) >= 0 { + panic(fmt.Sprintf("Put: keys are not in increasing order: %q, %q", kv.entries[n-1].key, key)) + } + kv.entries = append(kv.entries, KeyValueEntry{key, value}) + kv.nbytes += len(key) + len(value) +} + +func (kv *KeyValue) PutString(key, value string) { + kv.Put([]byte(key), []byte(value)) +} + +func (kv *KeyValue) PutU(key, value []byte) bool { + if i, exist := kv.Get(key); !exist { + if i < kv.Len() { + kv.entries = append(kv.entries[:i+1], kv.entries[i:]...) + kv.entries[i] = KeyValueEntry{key, value} + } else { + kv.entries = append(kv.entries, KeyValueEntry{key, value}) + } + kv.nbytes += len(key) + len(value) + return true + } else { + kv.nbytes += len(value) - len(kv.ValueAt(i)) + kv.entries[i].value = value + } + return false +} + +func (kv *KeyValue) PutUString(key, value string) bool { + return kv.PutU([]byte(key), []byte(value)) +} + +func (kv *KeyValue) Delete(key []byte) (exist bool, value []byte) { + i, exist := kv.Get(key) + if exist { + value = kv.entries[i].value + kv.DeleteIndex(i) + } + return +} + +func (kv *KeyValue) DeleteIndex(i int) bool { + if i < kv.Len() { + kv.nbytes -= len(kv.KeyAt(i)) + len(kv.ValueAt(i)) + kv.entries = append(kv.entries[:i], kv.entries[i+1:]...) + return true + } + return false +} + +func (kv KeyValue) Len() int { + return len(kv.entries) +} + +func (kv *KeyValue) Size() int { + return kv.nbytes +} + +func (kv KeyValue) KeyAt(i int) []byte { + return kv.entries[i].key +} + +func (kv KeyValue) ValueAt(i int) []byte { + return kv.entries[i].value +} + +func (kv KeyValue) Index(i int) (key, value []byte) { + if i < 0 || i >= len(kv.entries) { + panic(fmt.Sprintf("Index #%d: out of range", i)) + } + return kv.entries[i].key, kv.entries[i].value +} + +func (kv KeyValue) IndexInexact(i int) (key_, key, value []byte) { + key, value = kv.Index(i) + var key0 []byte + var key1 = kv.KeyAt(i) + if i > 0 { + key0 = kv.KeyAt(i - 1) + } + key_ = BytesSeparator(key0, key1) + return +} + +func (kv KeyValue) IndexOrNil(i int) (key, value []byte) { + if i >= 0 && i < len(kv.entries) { + return kv.entries[i].key, kv.entries[i].value + } + return nil, nil +} + +func (kv KeyValue) IndexString(i int) (key, value string) { + key_, _value := kv.Index(i) + return string(key_), string(_value) +} + +func (kv KeyValue) Search(key []byte) int { + return sort.Search(kv.Len(), func(i int) bool { + return cmp.Compare(kv.KeyAt(i), key) >= 0 + }) +} + +func (kv KeyValue) SearchString(key string) int { + return kv.Search([]byte(key)) +} + +func (kv KeyValue) Get(key []byte) (i int, exist bool) { + i = kv.Search(key) + if i < kv.Len() && cmp.Compare(kv.KeyAt(i), key) == 0 { + exist = true + } + return +} + +func (kv KeyValue) GetString(key string) (i int, exist bool) { + return kv.Get([]byte(key)) +} + +func (kv KeyValue) Iterate(fn func(i int, key, value []byte)) { + for i, x := range kv.entries { + fn(i, x.key, x.value) + } +} + +func (kv KeyValue) IterateString(fn func(i int, key, value string)) { + kv.Iterate(func(i int, key, value []byte) { + fn(i, string(key), string(value)) + }) +} + +func (kv KeyValue) IterateShuffled(rnd *rand.Rand, fn func(i int, key, value []byte)) { + ShuffledIndex(rnd, kv.Len(), 1, func(i int) { + fn(i, kv.entries[i].key, kv.entries[i].value) + }) +} + +func (kv KeyValue) IterateShuffledString(rnd *rand.Rand, fn func(i int, key, value string)) { + kv.IterateShuffled(rnd, func(i int, key, value []byte) { + fn(i, string(key), string(value)) + }) +} + +func (kv KeyValue) IterateInexact(fn func(i int, key_, key, value []byte)) { + for i := range kv.entries { + key_, key, value := kv.IndexInexact(i) + fn(i, key_, key, value) + } +} + +func (kv KeyValue) IterateInexactString(fn func(i int, key_, key, value string)) { + kv.IterateInexact(func(i int, key_, key, value []byte) { + fn(i, string(key_), string(key), string(value)) + }) +} + +func (kv KeyValue) Clone() KeyValue { + return KeyValue{append([]KeyValueEntry{}, kv.entries...), kv.nbytes} +} + +func (kv KeyValue) Slice(start, limit int) KeyValue { + if start < 0 || limit > kv.Len() { + panic(fmt.Sprintf("Slice %d .. %d: out of range", start, limit)) + } else if limit < start { + panic(fmt.Sprintf("Slice %d .. %d: invalid range", start, limit)) + } + return KeyValue{append([]KeyValueEntry{}, kv.entries[start:limit]...), kv.nbytes} +} + +func (kv KeyValue) SliceKey(start, limit []byte) KeyValue { + start_ := 0 + limit_ := kv.Len() + if start != nil { + start_ = kv.Search(start) + } + if limit != nil { + limit_ = kv.Search(limit) + } + return kv.Slice(start_, limit_) +} + +func (kv KeyValue) SliceKeyString(start, limit string) KeyValue { + return kv.SliceKey([]byte(start), []byte(limit)) +} + +func (kv KeyValue) SliceRange(r *util.Range) KeyValue { + if r != nil { + return kv.SliceKey(r.Start, r.Limit) + } + return kv.Clone() +} + +func (kv KeyValue) Range(start, limit int) (r util.Range) { + if kv.Len() > 0 { + if start == kv.Len() { + r.Start = BytesAfter(kv.KeyAt(start - 1)) + } else { + r.Start = kv.KeyAt(start) + } + } + if limit < kv.Len() { + r.Limit = kv.KeyAt(limit) + } + return +} + +func KeyValue_EmptyKey() *KeyValue { + kv := &KeyValue{} + kv.PutString("", "v") + return kv +} + +func KeyValue_EmptyValue() *KeyValue { + kv := &KeyValue{} + kv.PutString("abc", "") + kv.PutString("abcd", "") + return kv +} + +func KeyValue_OneKeyValue() *KeyValue { + kv := &KeyValue{} + kv.PutString("abc", "v") + return kv +} + +func KeyValue_BigValue() *KeyValue { + kv := &KeyValue{} + kv.PutString("big1", strings.Repeat("1", 200000)) + return kv +} + +func KeyValue_SpecialKey() *KeyValue { + kv := &KeyValue{} + kv.PutString("\xff\xff", "v3") + return kv +} + +func KeyValue_MultipleKeyValue() *KeyValue { + kv := &KeyValue{} + kv.PutString("a", "v") + kv.PutString("aa", "v1") + kv.PutString("aaa", "v2") + kv.PutString("aaacccccccccc", "v2") + kv.PutString("aaaccccccccccd", "v3") + kv.PutString("aaaccccccccccf", "v4") + kv.PutString("aaaccccccccccfg", "v5") + kv.PutString("ab", "v6") + kv.PutString("abc", "v7") + kv.PutString("abcd", "v8") + kv.PutString("accccccccccccccc", "v9") + kv.PutString("b", "v10") + kv.PutString("bb", "v11") + kv.PutString("bc", "v12") + kv.PutString("c", "v13") + kv.PutString("c1", "v13") + kv.PutString("czzzzzzzzzzzzzz", "v14") + kv.PutString("fffffffffffffff", "v15") + kv.PutString("g11", "v15") + kv.PutString("g111", "v15") + kv.PutString("g111\xff", "v15") + kv.PutString("zz", "v16") + kv.PutString("zzzzzzz", "v16") + kv.PutString("zzzzzzzzzzzzzzzz", "v16") + return kv +} + +var keymap = []byte("012345678ABCDEFGHIJKLMNOPQRSTUVWXYabcdefghijklmnopqrstuvwxy") + +func KeyValue_Generate(rnd *rand.Rand, n, minlen, maxlen, vminlen, vmaxlen int) *KeyValue { + if rnd == nil { + rnd = NewRand() + } + if maxlen < minlen { + panic("max len should >= min len") + } + + rrand := func(min, max int) int { + if min == max { + return max + } + return rnd.Intn(max-min) + min + } + + kv := &KeyValue{} + endC := byte(len(keymap) - 1) + gen := make([]byte, 0, maxlen) + for i := 0; i < n; i++ { + m := rrand(minlen, maxlen) + last := gen + retry: + gen = last[:m] + if k := len(last); m > k { + for j := k; j < m; j++ { + gen[j] = 0 + } + } else { + for j := m - 1; j >= 0; j-- { + c := last[j] + if c == endC { + continue + } + gen[j] = c + 1 + for j += 1; j < m; j++ { + gen[j] = 0 + } + goto ok + } + if m < maxlen { + m++ + goto retry + } + panic(fmt.Sprintf("only able to generate %d keys out of %d keys, try increasing max len", kv.Len(), n)) + ok: + } + key := make([]byte, m) + for j := 0; j < m; j++ { + key[j] = keymap[gen[j]] + } + value := make([]byte, rrand(vminlen, vmaxlen)) + for n := copy(value, []byte(fmt.Sprintf("v%d", i))); n < len(value); n++ { + value[n] = 'x' + } + kv.Put(key, value) + } + return kv +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/kvtest.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/kvtest.go new file mode 100644 index 000000000..4fc75b6f2 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/kvtest.go @@ -0,0 +1,136 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package testutil + +import ( + "fmt" + "math/rand" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + + "github.com/syndtr/goleveldb/leveldb/util" +) + +func KeyValueTesting(rnd *rand.Rand, p DB, kv KeyValue) { + if rnd == nil { + rnd = NewRand() + } + + if db, ok := p.(Find); ok { + It("Should find all keys with Find", func() { + ShuffledIndex(nil, kv.Len(), 1, func(i int) { + key_, key, value := kv.IndexInexact(i) + + // Using exact key. + rkey, rvalue, err := db.TestFind(key) + Expect(err).ShouldNot(HaveOccurred(), "Error for key %q", key) + Expect(rkey).Should(Equal(key), "Key") + Expect(rvalue).Should(Equal(value), "Value for key %q", key) + + // Using inexact key. + rkey, rvalue, err = db.TestFind(key_) + Expect(err).ShouldNot(HaveOccurred(), "Error for key %q (%q)", key_, key) + Expect(rkey).Should(Equal(key)) + Expect(rvalue).Should(Equal(value), "Value for key %q (%q)", key_, key) + }) + }) + + It("Should return error if the key is not present", func() { + var key []byte + if kv.Len() > 0 { + key_, _ := kv.Index(kv.Len() - 1) + key = BytesAfter(key_) + } + rkey, _, err := db.TestFind(key) + Expect(err).Should(HaveOccurred(), "Find for key %q yield key %q", key, rkey) + Expect(err).Should(Equal(util.ErrNotFound)) + }) + } + + if db, ok := p.(Get); ok { + It("Should only find exact key with Get", func() { + ShuffledIndex(nil, kv.Len(), 1, func(i int) { + key_, key, value := kv.IndexInexact(i) + + // Using exact key. + rvalue, err := db.TestGet(key) + Expect(err).ShouldNot(HaveOccurred(), "Error for key %q", key) + Expect(rvalue).Should(Equal(value), "Value for key %q", key) + + // Using inexact key. + if len(key_) > 0 { + _, err = db.TestGet(key_) + Expect(err).Should(HaveOccurred(), "Error for key %q", key_) + Expect(err).Should(Equal(util.ErrNotFound)) + } + }) + }) + } + + if db, ok := p.(NewIterator); ok { + TestIter := func(r *util.Range, _kv KeyValue) { + iter := db.TestNewIterator(r) + Expect(iter.Error()).ShouldNot(HaveOccurred()) + + t := IteratorTesting{ + KeyValue: _kv, + Iter: iter, + } + + DoIteratorTesting(&t) + } + + It("Should iterates and seeks correctly", func(done Done) { + TestIter(nil, kv.Clone()) + done <- true + }, 3.0) + + RandomIndex(rnd, kv.Len(), kv.Len(), func(i int) { + type slice struct { + r *util.Range + start, limit int + } + + key_, _, _ := kv.IndexInexact(i) + for _, x := range []slice{ + {&util.Range{Start: key_, Limit: nil}, i, kv.Len()}, + {&util.Range{Start: nil, Limit: key_}, 0, i}, + } { + It(fmt.Sprintf("Should iterates and seeks correctly of a slice %d .. %d", x.start, x.limit), func(done Done) { + TestIter(x.r, kv.Slice(x.start, x.limit)) + done <- true + }, 3.0) + } + }) + + RandomRange(rnd, kv.Len(), kv.Len(), func(start, limit int) { + It(fmt.Sprintf("Should iterates and seeks correctly of a slice %d .. %d", start, limit), func(done Done) { + r := kv.Range(start, limit) + TestIter(&r, kv.Slice(start, limit)) + done <- true + }, 3.0) + }) + } +} + +func AllKeyValueTesting(rnd *rand.Rand, body func(kv KeyValue) DB) { + Test := func(kv *KeyValue) func() { + return func() { + db := body(*kv) + KeyValueTesting(rnd, db, *kv) + } + } + + Describe("with no key/value (empty)", Test(&KeyValue{})) + Describe("with empty key", Test(KeyValue_EmptyKey())) + Describe("with empty value", Test(KeyValue_EmptyValue())) + Describe("with one key/value", Test(KeyValue_OneKeyValue())) + Describe("with big value", Test(KeyValue_BigValue())) + Describe("with special key", Test(KeyValue_SpecialKey())) + Describe("with multiple key/value", Test(KeyValue_MultipleKeyValue())) +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/storage.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/storage.go new file mode 100644 index 000000000..0f8d77a73 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/storage.go @@ -0,0 +1,585 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package testutil + +import ( + "bytes" + "fmt" + "io" + "os" + "path/filepath" + "runtime" + "strings" + "sync" + + . "github.com/onsi/gomega" + + "github.com/syndtr/goleveldb/leveldb/storage" + "github.com/syndtr/goleveldb/leveldb/util" +) + +var ( + storageMu sync.Mutex + storageUseFS bool = true + storageKeepFS bool = false + storageNum int +) + +type StorageMode int + +const ( + ModeOpen StorageMode = 1 << iota + ModeCreate + ModeRemove + ModeRead + ModeWrite + ModeSync + ModeClose +) + +const ( + modeOpen = iota + modeCreate + modeRemove + modeRead + modeWrite + modeSync + modeClose + + modeCount +) + +const ( + typeManifest = iota + typeJournal + typeTable + typeTemp + + typeCount +) + +const flattenCount = modeCount * typeCount + +func flattenType(m StorageMode, t storage.FileType) int { + var x int + switch m { + case ModeOpen: + x = modeOpen + case ModeCreate: + x = modeCreate + case ModeRemove: + x = modeRemove + case ModeRead: + x = modeRead + case ModeWrite: + x = modeWrite + case ModeSync: + x = modeSync + case ModeClose: + x = modeClose + default: + panic("invalid storage mode") + } + x *= typeCount + switch t { + case storage.TypeManifest: + return x + typeManifest + case storage.TypeJournal: + return x + typeJournal + case storage.TypeTable: + return x + typeTable + case storage.TypeTemp: + return x + typeTemp + default: + panic("invalid file type") + } +} + +func listFlattenType(m StorageMode, t storage.FileType) []int { + ret := make([]int, 0, flattenCount) + add := func(x int) { + x *= typeCount + switch { + case t&storage.TypeManifest != 0: + ret = append(ret, x+typeManifest) + case t&storage.TypeJournal != 0: + ret = append(ret, x+typeJournal) + case t&storage.TypeTable != 0: + ret = append(ret, x+typeTable) + case t&storage.TypeTemp != 0: + ret = append(ret, x+typeTemp) + } + } + switch { + case m&ModeOpen != 0: + add(modeOpen) + case m&ModeCreate != 0: + add(modeCreate) + case m&ModeRemove != 0: + add(modeRemove) + case m&ModeRead != 0: + add(modeRead) + case m&ModeWrite != 0: + add(modeWrite) + case m&ModeSync != 0: + add(modeSync) + case m&ModeClose != 0: + add(modeClose) + } + return ret +} + +func packFile(num uint64, t storage.FileType) uint64 { + if num>>(64-typeCount) != 0 { + panic("overflow") + } + return num<<typeCount | uint64(t) +} + +func unpackFile(x uint64) (uint64, storage.FileType) { + return x >> typeCount, storage.FileType(x) & storage.TypeAll +} + +type emulatedError struct { + err error +} + +func (err emulatedError) Error() string { + return fmt.Sprintf("emulated storage error: %v", err.err) +} + +type storageLock struct { + s *Storage + r util.Releaser +} + +func (l storageLock) Release() { + l.r.Release() + l.s.logI("storage lock released") +} + +type reader struct { + f *file + storage.Reader +} + +func (r *reader) Read(p []byte) (n int, err error) { + err = r.f.s.emulateError(ModeRead, r.f.Type()) + if err == nil { + r.f.s.stall(ModeRead, r.f.Type()) + n, err = r.Reader.Read(p) + } + r.f.s.count(ModeRead, r.f.Type(), n) + if err != nil && err != io.EOF { + r.f.s.logI("read error, num=%d type=%v n=%d err=%v", r.f.Num(), r.f.Type(), n, err) + } + return +} + +func (r *reader) ReadAt(p []byte, off int64) (n int, err error) { + err = r.f.s.emulateError(ModeRead, r.f.Type()) + if err == nil { + r.f.s.stall(ModeRead, r.f.Type()) + n, err = r.Reader.ReadAt(p, off) + } + r.f.s.count(ModeRead, r.f.Type(), n) + if err != nil && err != io.EOF { + r.f.s.logI("readAt error, num=%d type=%v offset=%d n=%d err=%v", r.f.Num(), r.f.Type(), off, n, err) + } + return +} + +func (r *reader) Close() (err error) { + return r.f.doClose(r.Reader) +} + +type writer struct { + f *file + storage.Writer +} + +func (w *writer) Write(p []byte) (n int, err error) { + err = w.f.s.emulateError(ModeWrite, w.f.Type()) + if err == nil { + w.f.s.stall(ModeWrite, w.f.Type()) + n, err = w.Writer.Write(p) + } + w.f.s.count(ModeWrite, w.f.Type(), n) + if err != nil && err != io.EOF { + w.f.s.logI("write error, num=%d type=%v n=%d err=%v", w.f.Num(), w.f.Type(), n, err) + } + return +} + +func (w *writer) Sync() (err error) { + err = w.f.s.emulateError(ModeSync, w.f.Type()) + if err == nil { + w.f.s.stall(ModeSync, w.f.Type()) + err = w.Writer.Sync() + } + w.f.s.count(ModeSync, w.f.Type(), 0) + if err != nil { + w.f.s.logI("sync error, num=%d type=%v err=%v", w.f.Num(), w.f.Type(), err) + } + return +} + +func (w *writer) Close() (err error) { + return w.f.doClose(w.Writer) +} + +type file struct { + s *Storage + storage.File +} + +func (f *file) pack() uint64 { + return packFile(f.Num(), f.Type()) +} + +func (f *file) assertOpen() { + ExpectWithOffset(2, f.s.opens).NotTo(HaveKey(f.pack()), "File open, num=%d type=%v writer=%v", f.Num(), f.Type(), f.s.opens[f.pack()]) +} + +func (f *file) doClose(closer io.Closer) (err error) { + err = f.s.emulateError(ModeClose, f.Type()) + if err == nil { + f.s.stall(ModeClose, f.Type()) + } + f.s.mu.Lock() + defer f.s.mu.Unlock() + if err == nil { + ExpectWithOffset(2, f.s.opens).To(HaveKey(f.pack()), "File closed, num=%d type=%v", f.Num(), f.Type()) + err = closer.Close() + } + f.s.countNB(ModeClose, f.Type(), 0) + writer := f.s.opens[f.pack()] + if err != nil { + f.s.logISkip(1, "file close failed, num=%d type=%v writer=%v err=%v", f.Num(), f.Type(), writer, err) + } else { + f.s.logISkip(1, "file closed, num=%d type=%v writer=%v", f.Num(), f.Type(), writer) + delete(f.s.opens, f.pack()) + } + return +} + +func (f *file) Open() (r storage.Reader, err error) { + err = f.s.emulateError(ModeOpen, f.Type()) + if err == nil { + f.s.stall(ModeOpen, f.Type()) + } + f.s.mu.Lock() + defer f.s.mu.Unlock() + if err == nil { + f.assertOpen() + f.s.countNB(ModeOpen, f.Type(), 0) + r, err = f.File.Open() + } + if err != nil { + f.s.logI("file open failed, num=%d type=%v err=%v", f.Num(), f.Type(), err) + } else { + f.s.logI("file opened, num=%d type=%v", f.Num(), f.Type()) + f.s.opens[f.pack()] = false + r = &reader{f, r} + } + return +} + +func (f *file) Create() (w storage.Writer, err error) { + err = f.s.emulateError(ModeCreate, f.Type()) + if err == nil { + f.s.stall(ModeCreate, f.Type()) + } + f.s.mu.Lock() + defer f.s.mu.Unlock() + if err == nil { + f.assertOpen() + f.s.countNB(ModeCreate, f.Type(), 0) + w, err = f.File.Create() + } + if err != nil { + f.s.logI("file create failed, num=%d type=%v err=%v", f.Num(), f.Type(), err) + } else { + f.s.logI("file created, num=%d type=%v", f.Num(), f.Type()) + f.s.opens[f.pack()] = true + w = &writer{f, w} + } + return +} + +func (f *file) Remove() (err error) { + err = f.s.emulateError(ModeRemove, f.Type()) + if err == nil { + f.s.stall(ModeRemove, f.Type()) + } + f.s.mu.Lock() + defer f.s.mu.Unlock() + if err == nil { + f.assertOpen() + f.s.countNB(ModeRemove, f.Type(), 0) + err = f.File.Remove() + } + if err != nil { + f.s.logI("file remove failed, num=%d type=%v err=%v", f.Num(), f.Type(), err) + } else { + f.s.logI("file removed, num=%d type=%v", f.Num(), f.Type()) + } + return +} + +type Storage struct { + storage.Storage + closeFn func() error + + lmu sync.Mutex + lb bytes.Buffer + + mu sync.Mutex + // Open files, true=writer, false=reader + opens map[uint64]bool + counters [flattenCount]int + bytesCounter [flattenCount]int64 + emulatedError [flattenCount]error + stallCond sync.Cond + stalled [flattenCount]bool +} + +func (s *Storage) log(skip int, str string) { + s.lmu.Lock() + defer s.lmu.Unlock() + _, file, line, ok := runtime.Caller(skip + 2) + if ok { + // Truncate file name at last file name separator. + if index := strings.LastIndex(file, "/"); index >= 0 { + file = file[index+1:] + } else if index = strings.LastIndex(file, "\\"); index >= 0 { + file = file[index+1:] + } + } else { + file = "???" + line = 1 + } + fmt.Fprintf(&s.lb, "%s:%d: ", file, line) + lines := strings.Split(str, "\n") + if l := len(lines); l > 1 && lines[l-1] == "" { + lines = lines[:l-1] + } + for i, line := range lines { + if i > 0 { + s.lb.WriteString("\n\t") + } + s.lb.WriteString(line) + } + s.lb.WriteByte('\n') +} + +func (s *Storage) logISkip(skip int, format string, args ...interface{}) { + pc, _, _, ok := runtime.Caller(skip + 1) + if ok { + if f := runtime.FuncForPC(pc); f != nil { + fname := f.Name() + if index := strings.LastIndex(fname, "."); index >= 0 { + fname = fname[index+1:] + } + format = fname + ": " + format + } + } + s.log(skip+1, fmt.Sprintf(format, args...)) +} + +func (s *Storage) logI(format string, args ...interface{}) { + s.logISkip(1, format, args...) +} + +func (s *Storage) Log(str string) { + s.log(1, "Log: "+str) +} + +func (s *Storage) Lock() (r util.Releaser, err error) { + r, err = s.Storage.Lock() + if err != nil { + s.logI("storage locking failed, err=%v", err) + } else { + s.logI("storage locked") + r = storageLock{s, r} + } + return +} + +func (s *Storage) GetFile(num uint64, t storage.FileType) storage.File { + return &file{s, s.Storage.GetFile(num, t)} +} + +func (s *Storage) GetFiles(t storage.FileType) (files []storage.File, err error) { + rfiles, err := s.Storage.GetFiles(t) + if err != nil { + s.logI("get files failed, err=%v", err) + return + } + files = make([]storage.File, len(rfiles)) + for i, f := range rfiles { + files[i] = &file{s, f} + } + s.logI("get files, type=0x%x count=%d", int(t), len(files)) + return +} + +func (s *Storage) GetManifest() (f storage.File, err error) { + manifest, err := s.Storage.GetManifest() + if err != nil { + if !os.IsNotExist(err) { + s.logI("get manifest failed, err=%v", err) + } + return + } + s.logI("get manifest, num=%d", manifest.Num()) + return &file{s, manifest}, nil +} + +func (s *Storage) SetManifest(f storage.File) error { + f_, ok := f.(*file) + ExpectWithOffset(1, ok).To(BeTrue()) + ExpectWithOffset(1, f_.Type()).To(Equal(storage.TypeManifest)) + err := s.Storage.SetManifest(f_.File) + if err != nil { + s.logI("set manifest failed, err=%v", err) + } else { + s.logI("set manifest, num=%d", f_.Num()) + } + return err +} + +func (s *Storage) openFiles() string { + out := "Open files:" + for x, writer := range s.opens { + num, t := unpackFile(x) + out += fmt.Sprintf("\n · num=%d type=%v writer=%v", num, t, writer) + } + return out +} + +func (s *Storage) Close() error { + s.mu.Lock() + defer s.mu.Unlock() + ExpectWithOffset(1, s.opens).To(BeEmpty(), s.openFiles()) + err := s.Storage.Close() + if err != nil { + s.logI("storage closing failed, err=%v", err) + } else { + s.logI("storage closed") + } + if s.closeFn != nil { + if err1 := s.closeFn(); err1 != nil { + s.logI("close func error, err=%v", err1) + } + } + return err +} + +func (s *Storage) countNB(m StorageMode, t storage.FileType, n int) { + s.counters[flattenType(m, t)]++ + s.bytesCounter[flattenType(m, t)] += int64(n) +} + +func (s *Storage) count(m StorageMode, t storage.FileType, n int) { + s.mu.Lock() + defer s.mu.Unlock() + s.countNB(m, t, n) +} + +func (s *Storage) ResetCounter(m StorageMode, t storage.FileType) { + for _, x := range listFlattenType(m, t) { + s.counters[x] = 0 + s.bytesCounter[x] = 0 + } +} + +func (s *Storage) Counter(m StorageMode, t storage.FileType) (count int, bytes int64) { + for _, x := range listFlattenType(m, t) { + count += s.counters[x] + bytes += s.bytesCounter[x] + } + return +} + +func (s *Storage) emulateError(m StorageMode, t storage.FileType) error { + s.mu.Lock() + defer s.mu.Unlock() + err := s.emulatedError[flattenType(m, t)] + if err != nil { + return emulatedError{err} + } + return nil +} + +func (s *Storage) EmulateError(m StorageMode, t storage.FileType, err error) { + s.mu.Lock() + defer s.mu.Unlock() + for _, x := range listFlattenType(m, t) { + s.emulatedError[x] = err + } +} + +func (s *Storage) stall(m StorageMode, t storage.FileType) { + x := flattenType(m, t) + s.mu.Lock() + defer s.mu.Unlock() + for s.stalled[x] { + s.stallCond.Wait() + } +} + +func (s *Storage) Stall(m StorageMode, t storage.FileType) { + s.mu.Lock() + defer s.mu.Unlock() + for _, x := range listFlattenType(m, t) { + s.stalled[x] = true + } +} + +func (s *Storage) Release(m StorageMode, t storage.FileType) { + s.mu.Lock() + defer s.mu.Unlock() + for _, x := range listFlattenType(m, t) { + s.stalled[x] = false + } + s.stallCond.Broadcast() +} + +func NewStorage() *Storage { + var stor storage.Storage + var closeFn func() error + if storageUseFS { + for { + storageMu.Lock() + num := storageNum + storageNum++ + storageMu.Unlock() + path := filepath.Join(os.TempDir(), fmt.Sprintf("goleveldb-test%d0%d0%d", os.Getuid(), os.Getpid(), num)) + if _, err := os.Stat(path); os.IsNotExist(err) { + stor, err = storage.OpenFile(path) + ExpectWithOffset(1, err).NotTo(HaveOccurred(), "creating storage at %s", path) + closeFn = func() error { + if storageKeepFS { + return nil + } + return os.RemoveAll(path) + } + break + } + } + } else { + stor = storage.NewMemStorage() + } + s := &Storage{ + Storage: stor, + closeFn: closeFn, + opens: make(map[uint64]bool), + } + s.stallCond.L = &s.mu + return s +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/util.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/util.go new file mode 100644 index 000000000..38fe25d52 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil/util.go @@ -0,0 +1,157 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package testutil + +import ( + "bytes" + "flag" + "math/rand" + "reflect" + "sync" + + "github.com/onsi/ginkgo/config" + + "github.com/syndtr/goleveldb/leveldb/comparer" +) + +var ( + runfn = make(map[string][]func()) + runmu sync.Mutex +) + +func Defer(args ...interface{}) bool { + var ( + group string + fn func() + ) + for _, arg := range args { + v := reflect.ValueOf(arg) + switch v.Kind() { + case reflect.String: + group = v.String() + case reflect.Func: + r := reflect.ValueOf(&fn).Elem() + r.Set(v) + } + } + if fn != nil { + runmu.Lock() + runfn[group] = append(runfn[group], fn) + runmu.Unlock() + } + return true +} + +func RunDefer(groups ...string) bool { + if len(groups) == 0 { + groups = append(groups, "") + } + runmu.Lock() + var runfn_ []func() + for _, group := range groups { + runfn_ = append(runfn_, runfn[group]...) + delete(runfn, group) + } + runmu.Unlock() + for _, fn := range runfn_ { + fn() + } + return runfn_ != nil +} + +func RandomSeed() int64 { + if !flag.Parsed() { + panic("random seed not initialized") + } + return config.GinkgoConfig.RandomSeed +} + +func NewRand() *rand.Rand { + return rand.New(rand.NewSource(RandomSeed())) +} + +var cmp = comparer.DefaultComparer + +func BytesSeparator(a, b []byte) []byte { + if bytes.Equal(a, b) { + return b + } + i, n := 0, len(a) + if n > len(b) { + n = len(b) + } + for ; i < n && (a[i] == b[i]); i++ { + } + x := append([]byte{}, a[:i]...) + if i < n { + if c := a[i] + 1; c < b[i] { + return append(x, c) + } + x = append(x, a[i]) + i++ + } + for ; i < len(a); i++ { + if c := a[i]; c < 0xff { + return append(x, c+1) + } else { + x = append(x, c) + } + } + if len(b) > i && b[i] > 0 { + return append(x, b[i]-1) + } + return append(x, 'x') +} + +func BytesAfter(b []byte) []byte { + var x []byte + for _, c := range b { + if c < 0xff { + return append(x, c+1) + } else { + x = append(x, c) + } + } + return append(x, 'x') +} + +func RandomIndex(rnd *rand.Rand, n, round int, fn func(i int)) { + if rnd == nil { + rnd = NewRand() + } + for x := 0; x < round; x++ { + fn(rnd.Intn(n)) + } + return +} + +func ShuffledIndex(rnd *rand.Rand, n, round int, fn func(i int)) { + if rnd == nil { + rnd = NewRand() + } + for x := 0; x < round; x++ { + for _, i := range rnd.Perm(n) { + fn(i) + } + } + return +} + +func RandomRange(rnd *rand.Rand, n, round int, fn func(start, limit int)) { + if rnd == nil { + rnd = NewRand() + } + for x := 0; x < round; x++ { + start := rnd.Intn(n) + length := 0 + if j := n - start; j > 0 { + length = rnd.Intn(j) + } + fn(start, start+length) + } + return +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil_test.go new file mode 100644 index 000000000..c1402fda3 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/testutil_test.go @@ -0,0 +1,58 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + . "github.com/onsi/gomega" + + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/testutil" + "github.com/syndtr/goleveldb/leveldb/util" +) + +type testingDB struct { + *DB + ro *opt.ReadOptions + wo *opt.WriteOptions + stor *testutil.Storage +} + +func (t *testingDB) TestPut(key []byte, value []byte) error { + return t.Put(key, value, t.wo) +} + +func (t *testingDB) TestDelete(key []byte) error { + return t.Delete(key, t.wo) +} + +func (t *testingDB) TestGet(key []byte) (value []byte, err error) { + return t.Get(key, t.ro) +} + +func (t *testingDB) TestNewIterator(slice *util.Range) iterator.Iterator { + return t.NewIterator(slice, t.ro) +} + +func (t *testingDB) TestClose() { + err := t.Close() + ExpectWithOffset(1, err).NotTo(HaveOccurred()) + err = t.stor.Close() + ExpectWithOffset(1, err).NotTo(HaveOccurred()) +} + +func newTestingDB(o *opt.Options, ro *opt.ReadOptions, wo *opt.WriteOptions) *testingDB { + stor := testutil.NewStorage() + db, err := Open(stor, o) + Expect(err).NotTo(HaveOccurred()) + return &testingDB{ + DB: db, + ro: ro, + wo: wo, + stor: stor, + } +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util.go new file mode 100644 index 000000000..a43d2e460 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util.go @@ -0,0 +1,91 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "fmt" + "sort" + + "github.com/syndtr/goleveldb/leveldb/storage" +) + +func shorten(str string) string { + if len(str) <= 4 { + return str + } + return str[:1] + ".." + str[len(str)-1:] +} + +var bunits = [...]string{"", "Ki", "Mi", "Gi"} + +func shortenb(bytes int) string { + i := 0 + for ; bytes > 1024 && i < 4; i++ { + bytes /= 1024 + } + return fmt.Sprintf("%d%sB", bytes, bunits[i]) +} + +func sshortenb(bytes int) string { + if bytes == 0 { + return "~" + } + sign := "+" + if bytes < 0 { + sign = "-" + bytes *= -1 + } + i := 0 + for ; bytes > 1024 && i < 4; i++ { + bytes /= 1024 + } + return fmt.Sprintf("%s%d%sB", sign, bytes, bunits[i]) +} + +func sint(x int) string { + if x == 0 { + return "~" + } + sign := "+" + if x < 0 { + sign = "-" + x *= -1 + } + return fmt.Sprintf("%s%d", sign, x) +} + +func minInt(a, b int) int { + if a < b { + return a + } + return b +} + +func maxInt(a, b int) int { + if a > b { + return a + } + return b +} + +type files []storage.File + +func (p files) Len() int { + return len(p) +} + +func (p files) Less(i, j int) bool { + return p[i].Num() < p[j].Num() +} + +func (p files) Swap(i, j int) { + p[i], p[j] = p[j], p[i] +} + +func (p files) sort() { + sort.Sort(p) +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/buffer.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/buffer.go new file mode 100644 index 000000000..21de24255 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/buffer.go @@ -0,0 +1,293 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package util + +// This a copy of Go std bytes.Buffer with some modification +// and some features stripped. + +import ( + "bytes" + "io" +) + +// A Buffer is a variable-sized buffer of bytes with Read and Write methods. +// The zero value for Buffer is an empty buffer ready to use. +type Buffer struct { + buf []byte // contents are the bytes buf[off : len(buf)] + off int // read at &buf[off], write at &buf[len(buf)] + bootstrap [64]byte // memory to hold first slice; helps small buffers (Printf) avoid allocation. +} + +// Bytes returns a slice of the contents of the unread portion of the buffer; +// len(b.Bytes()) == b.Len(). If the caller changes the contents of the +// returned slice, the contents of the buffer will change provided there +// are no intervening method calls on the Buffer. +func (b *Buffer) Bytes() []byte { return b.buf[b.off:] } + +// String returns the contents of the unread portion of the buffer +// as a string. If the Buffer is a nil pointer, it returns "<nil>". +func (b *Buffer) String() string { + if b == nil { + // Special case, useful in debugging. + return "<nil>" + } + return string(b.buf[b.off:]) +} + +// Len returns the number of bytes of the unread portion of the buffer; +// b.Len() == len(b.Bytes()). +func (b *Buffer) Len() int { return len(b.buf) - b.off } + +// Truncate discards all but the first n unread bytes from the buffer. +// It panics if n is negative or greater than the length of the buffer. +func (b *Buffer) Truncate(n int) { + switch { + case n < 0 || n > b.Len(): + panic("leveldb/util.Buffer: truncation out of range") + case n == 0: + // Reuse buffer space. + b.off = 0 + } + b.buf = b.buf[0 : b.off+n] +} + +// Reset resets the buffer so it has no content. +// b.Reset() is the same as b.Truncate(0). +func (b *Buffer) Reset() { b.Truncate(0) } + +// grow grows the buffer to guarantee space for n more bytes. +// It returns the index where bytes should be written. +// If the buffer can't grow it will panic with bytes.ErrTooLarge. +func (b *Buffer) grow(n int) int { + m := b.Len() + // If buffer is empty, reset to recover space. + if m == 0 && b.off != 0 { + b.Truncate(0) + } + if len(b.buf)+n > cap(b.buf) { + var buf []byte + if b.buf == nil && n <= len(b.bootstrap) { + buf = b.bootstrap[0:] + } else if m+n <= cap(b.buf)/2 { + // We can slide things down instead of allocating a new + // slice. We only need m+n <= cap(b.buf) to slide, but + // we instead let capacity get twice as large so we + // don't spend all our time copying. + copy(b.buf[:], b.buf[b.off:]) + buf = b.buf[:m] + } else { + // not enough space anywhere + buf = makeSlice(2*cap(b.buf) + n) + copy(buf, b.buf[b.off:]) + } + b.buf = buf + b.off = 0 + } + b.buf = b.buf[0 : b.off+m+n] + return b.off + m +} + +// Alloc allocs n bytes of slice from the buffer, growing the buffer as +// needed. If n is negative, Alloc will panic. +// If the buffer can't grow it will panic with bytes.ErrTooLarge. +func (b *Buffer) Alloc(n int) []byte { + if n < 0 { + panic("leveldb/util.Buffer.Alloc: negative count") + } + m := b.grow(n) + return b.buf[m:] +} + +// Grow grows the buffer's capacity, if necessary, to guarantee space for +// another n bytes. After Grow(n), at least n bytes can be written to the +// buffer without another allocation. +// If n is negative, Grow will panic. +// If the buffer can't grow it will panic with bytes.ErrTooLarge. +func (b *Buffer) Grow(n int) { + if n < 0 { + panic("leveldb/util.Buffer.Grow: negative count") + } + m := b.grow(n) + b.buf = b.buf[0:m] +} + +// Write appends the contents of p to the buffer, growing the buffer as +// needed. The return value n is the length of p; err is always nil. If the +// buffer becomes too large, Write will panic with bytes.ErrTooLarge. +func (b *Buffer) Write(p []byte) (n int, err error) { + m := b.grow(len(p)) + return copy(b.buf[m:], p), nil +} + +// MinRead is the minimum slice size passed to a Read call by +// Buffer.ReadFrom. As long as the Buffer has at least MinRead bytes beyond +// what is required to hold the contents of r, ReadFrom will not grow the +// underlying buffer. +const MinRead = 512 + +// ReadFrom reads data from r until EOF and appends it to the buffer, growing +// the buffer as needed. The return value n is the number of bytes read. Any +// error except io.EOF encountered during the read is also returned. If the +// buffer becomes too large, ReadFrom will panic with bytes.ErrTooLarge. +func (b *Buffer) ReadFrom(r io.Reader) (n int64, err error) { + // If buffer is empty, reset to recover space. + if b.off >= len(b.buf) { + b.Truncate(0) + } + for { + if free := cap(b.buf) - len(b.buf); free < MinRead { + // not enough space at end + newBuf := b.buf + if b.off+free < MinRead { + // not enough space using beginning of buffer; + // double buffer capacity + newBuf = makeSlice(2*cap(b.buf) + MinRead) + } + copy(newBuf, b.buf[b.off:]) + b.buf = newBuf[:len(b.buf)-b.off] + b.off = 0 + } + m, e := r.Read(b.buf[len(b.buf):cap(b.buf)]) + b.buf = b.buf[0 : len(b.buf)+m] + n += int64(m) + if e == io.EOF { + break + } + if e != nil { + return n, e + } + } + return n, nil // err is EOF, so return nil explicitly +} + +// makeSlice allocates a slice of size n. If the allocation fails, it panics +// with bytes.ErrTooLarge. +func makeSlice(n int) []byte { + // If the make fails, give a known error. + defer func() { + if recover() != nil { + panic(bytes.ErrTooLarge) + } + }() + return make([]byte, n) +} + +// WriteTo writes data to w until the buffer is drained or an error occurs. +// The return value n is the number of bytes written; it always fits into an +// int, but it is int64 to match the io.WriterTo interface. Any error +// encountered during the write is also returned. +func (b *Buffer) WriteTo(w io.Writer) (n int64, err error) { + if b.off < len(b.buf) { + nBytes := b.Len() + m, e := w.Write(b.buf[b.off:]) + if m > nBytes { + panic("leveldb/util.Buffer.WriteTo: invalid Write count") + } + b.off += m + n = int64(m) + if e != nil { + return n, e + } + // all bytes should have been written, by definition of + // Write method in io.Writer + if m != nBytes { + return n, io.ErrShortWrite + } + } + // Buffer is now empty; reset. + b.Truncate(0) + return +} + +// WriteByte appends the byte c to the buffer, growing the buffer as needed. +// The returned error is always nil, but is included to match bufio.Writer's +// WriteByte. If the buffer becomes too large, WriteByte will panic with +// bytes.ErrTooLarge. +func (b *Buffer) WriteByte(c byte) error { + m := b.grow(1) + b.buf[m] = c + return nil +} + +// Read reads the next len(p) bytes from the buffer or until the buffer +// is drained. The return value n is the number of bytes read. If the +// buffer has no data to return, err is io.EOF (unless len(p) is zero); +// otherwise it is nil. +func (b *Buffer) Read(p []byte) (n int, err error) { + if b.off >= len(b.buf) { + // Buffer is empty, reset to recover space. + b.Truncate(0) + if len(p) == 0 { + return + } + return 0, io.EOF + } + n = copy(p, b.buf[b.off:]) + b.off += n + return +} + +// Next returns a slice containing the next n bytes from the buffer, +// advancing the buffer as if the bytes had been returned by Read. +// If there are fewer than n bytes in the buffer, Next returns the entire buffer. +// The slice is only valid until the next call to a read or write method. +func (b *Buffer) Next(n int) []byte { + m := b.Len() + if n > m { + n = m + } + data := b.buf[b.off : b.off+n] + b.off += n + return data +} + +// ReadByte reads and returns the next byte from the buffer. +// If no byte is available, it returns error io.EOF. +func (b *Buffer) ReadByte() (c byte, err error) { + if b.off >= len(b.buf) { + // Buffer is empty, reset to recover space. + b.Truncate(0) + return 0, io.EOF + } + c = b.buf[b.off] + b.off++ + return c, nil +} + +// ReadBytes reads until the first occurrence of delim in the input, +// returning a slice containing the data up to and including the delimiter. +// If ReadBytes encounters an error before finding a delimiter, +// it returns the data read before the error and the error itself (often io.EOF). +// ReadBytes returns err != nil if and only if the returned data does not end in +// delim. +func (b *Buffer) ReadBytes(delim byte) (line []byte, err error) { + slice, err := b.readSlice(delim) + // return a copy of slice. The buffer's backing array may + // be overwritten by later calls. + line = append(line, slice...) + return +} + +// readSlice is like ReadBytes but returns a reference to internal buffer data. +func (b *Buffer) readSlice(delim byte) (line []byte, err error) { + i := bytes.IndexByte(b.buf[b.off:], delim) + end := b.off + i + 1 + if i < 0 { + end = len(b.buf) + err = io.EOF + } + line = b.buf[b.off:end] + b.off = end + return line, err +} + +// NewBuffer creates and initializes a new Buffer using buf as its initial +// contents. It is intended to prepare a Buffer to read existing data. It +// can also be used to size the internal buffer for writing. To do that, +// buf should have the desired capacity but a length of zero. +// +// In most cases, new(Buffer) (or just declaring a Buffer variable) is +// sufficient to initialize a Buffer. +func NewBuffer(buf []byte) *Buffer { return &Buffer{buf: buf} } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/buffer_test.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/buffer_test.go new file mode 100644 index 000000000..87d96739c --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/buffer_test.go @@ -0,0 +1,369 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package util + +import ( + "bytes" + "io" + "math/rand" + "runtime" + "testing" +) + +const N = 10000 // make this bigger for a larger (and slower) test +var data string // test data for write tests +var testBytes []byte // test data; same as data but as a slice. + +func init() { + testBytes = make([]byte, N) + for i := 0; i < N; i++ { + testBytes[i] = 'a' + byte(i%26) + } + data = string(testBytes) +} + +// Verify that contents of buf match the string s. +func check(t *testing.T, testname string, buf *Buffer, s string) { + bytes := buf.Bytes() + str := buf.String() + if buf.Len() != len(bytes) { + t.Errorf("%s: buf.Len() == %d, len(buf.Bytes()) == %d", testname, buf.Len(), len(bytes)) + } + + if buf.Len() != len(str) { + t.Errorf("%s: buf.Len() == %d, len(buf.String()) == %d", testname, buf.Len(), len(str)) + } + + if buf.Len() != len(s) { + t.Errorf("%s: buf.Len() == %d, len(s) == %d", testname, buf.Len(), len(s)) + } + + if string(bytes) != s { + t.Errorf("%s: string(buf.Bytes()) == %q, s == %q", testname, string(bytes), s) + } +} + +// Fill buf through n writes of byte slice fub. +// The initial contents of buf corresponds to the string s; +// the result is the final contents of buf returned as a string. +func fillBytes(t *testing.T, testname string, buf *Buffer, s string, n int, fub []byte) string { + check(t, testname+" (fill 1)", buf, s) + for ; n > 0; n-- { + m, err := buf.Write(fub) + if m != len(fub) { + t.Errorf(testname+" (fill 2): m == %d, expected %d", m, len(fub)) + } + if err != nil { + t.Errorf(testname+" (fill 3): err should always be nil, found err == %s", err) + } + s += string(fub) + check(t, testname+" (fill 4)", buf, s) + } + return s +} + +func TestNewBuffer(t *testing.T) { + buf := NewBuffer(testBytes) + check(t, "NewBuffer", buf, data) +} + +// Empty buf through repeated reads into fub. +// The initial contents of buf corresponds to the string s. +func empty(t *testing.T, testname string, buf *Buffer, s string, fub []byte) { + check(t, testname+" (empty 1)", buf, s) + + for { + n, err := buf.Read(fub) + if n == 0 { + break + } + if err != nil { + t.Errorf(testname+" (empty 2): err should always be nil, found err == %s", err) + } + s = s[n:] + check(t, testname+" (empty 3)", buf, s) + } + + check(t, testname+" (empty 4)", buf, "") +} + +func TestBasicOperations(t *testing.T) { + var buf Buffer + + for i := 0; i < 5; i++ { + check(t, "TestBasicOperations (1)", &buf, "") + + buf.Reset() + check(t, "TestBasicOperations (2)", &buf, "") + + buf.Truncate(0) + check(t, "TestBasicOperations (3)", &buf, "") + + n, err := buf.Write([]byte(data[0:1])) + if n != 1 { + t.Errorf("wrote 1 byte, but n == %d", n) + } + if err != nil { + t.Errorf("err should always be nil, but err == %s", err) + } + check(t, "TestBasicOperations (4)", &buf, "a") + + buf.WriteByte(data[1]) + check(t, "TestBasicOperations (5)", &buf, "ab") + + n, err = buf.Write([]byte(data[2:26])) + if n != 24 { + t.Errorf("wrote 25 bytes, but n == %d", n) + } + check(t, "TestBasicOperations (6)", &buf, string(data[0:26])) + + buf.Truncate(26) + check(t, "TestBasicOperations (7)", &buf, string(data[0:26])) + + buf.Truncate(20) + check(t, "TestBasicOperations (8)", &buf, string(data[0:20])) + + empty(t, "TestBasicOperations (9)", &buf, string(data[0:20]), make([]byte, 5)) + empty(t, "TestBasicOperations (10)", &buf, "", make([]byte, 100)) + + buf.WriteByte(data[1]) + c, err := buf.ReadByte() + if err != nil { + t.Error("ReadByte unexpected eof") + } + if c != data[1] { + t.Errorf("ReadByte wrong value c=%v", c) + } + c, err = buf.ReadByte() + if err == nil { + t.Error("ReadByte unexpected not eof") + } + } +} + +func TestLargeByteWrites(t *testing.T) { + var buf Buffer + limit := 30 + if testing.Short() { + limit = 9 + } + for i := 3; i < limit; i += 3 { + s := fillBytes(t, "TestLargeWrites (1)", &buf, "", 5, testBytes) + empty(t, "TestLargeByteWrites (2)", &buf, s, make([]byte, len(data)/i)) + } + check(t, "TestLargeByteWrites (3)", &buf, "") +} + +func TestLargeByteReads(t *testing.T) { + var buf Buffer + for i := 3; i < 30; i += 3 { + s := fillBytes(t, "TestLargeReads (1)", &buf, "", 5, testBytes[0:len(testBytes)/i]) + empty(t, "TestLargeReads (2)", &buf, s, make([]byte, len(data))) + } + check(t, "TestLargeByteReads (3)", &buf, "") +} + +func TestMixedReadsAndWrites(t *testing.T) { + var buf Buffer + s := "" + for i := 0; i < 50; i++ { + wlen := rand.Intn(len(data)) + s = fillBytes(t, "TestMixedReadsAndWrites (1)", &buf, s, 1, testBytes[0:wlen]) + rlen := rand.Intn(len(data)) + fub := make([]byte, rlen) + n, _ := buf.Read(fub) + s = s[n:] + } + empty(t, "TestMixedReadsAndWrites (2)", &buf, s, make([]byte, buf.Len())) +} + +func TestNil(t *testing.T) { + var b *Buffer + if b.String() != "<nil>" { + t.Errorf("expected <nil>; got %q", b.String()) + } +} + +func TestReadFrom(t *testing.T) { + var buf Buffer + for i := 3; i < 30; i += 3 { + s := fillBytes(t, "TestReadFrom (1)", &buf, "", 5, testBytes[0:len(testBytes)/i]) + var b Buffer + b.ReadFrom(&buf) + empty(t, "TestReadFrom (2)", &b, s, make([]byte, len(data))) + } +} + +func TestWriteTo(t *testing.T) { + var buf Buffer + for i := 3; i < 30; i += 3 { + s := fillBytes(t, "TestWriteTo (1)", &buf, "", 5, testBytes[0:len(testBytes)/i]) + var b Buffer + buf.WriteTo(&b) + empty(t, "TestWriteTo (2)", &b, s, make([]byte, len(data))) + } +} + +func TestNext(t *testing.T) { + b := []byte{0, 1, 2, 3, 4} + tmp := make([]byte, 5) + for i := 0; i <= 5; i++ { + for j := i; j <= 5; j++ { + for k := 0; k <= 6; k++ { + // 0 <= i <= j <= 5; 0 <= k <= 6 + // Check that if we start with a buffer + // of length j at offset i and ask for + // Next(k), we get the right bytes. + buf := NewBuffer(b[0:j]) + n, _ := buf.Read(tmp[0:i]) + if n != i { + t.Fatalf("Read %d returned %d", i, n) + } + bb := buf.Next(k) + want := k + if want > j-i { + want = j - i + } + if len(bb) != want { + t.Fatalf("in %d,%d: len(Next(%d)) == %d", i, j, k, len(bb)) + } + for l, v := range bb { + if v != byte(l+i) { + t.Fatalf("in %d,%d: Next(%d)[%d] = %d, want %d", i, j, k, l, v, l+i) + } + } + } + } + } +} + +var readBytesTests = []struct { + buffer string + delim byte + expected []string + err error +}{ + {"", 0, []string{""}, io.EOF}, + {"a\x00", 0, []string{"a\x00"}, nil}, + {"abbbaaaba", 'b', []string{"ab", "b", "b", "aaab"}, nil}, + {"hello\x01world", 1, []string{"hello\x01"}, nil}, + {"foo\nbar", 0, []string{"foo\nbar"}, io.EOF}, + {"alpha\nbeta\ngamma\n", '\n', []string{"alpha\n", "beta\n", "gamma\n"}, nil}, + {"alpha\nbeta\ngamma", '\n', []string{"alpha\n", "beta\n", "gamma"}, io.EOF}, +} + +func TestReadBytes(t *testing.T) { + for _, test := range readBytesTests { + buf := NewBuffer([]byte(test.buffer)) + var err error + for _, expected := range test.expected { + var bytes []byte + bytes, err = buf.ReadBytes(test.delim) + if string(bytes) != expected { + t.Errorf("expected %q, got %q", expected, bytes) + } + if err != nil { + break + } + } + if err != test.err { + t.Errorf("expected error %v, got %v", test.err, err) + } + } +} + +func TestGrow(t *testing.T) { + x := []byte{'x'} + y := []byte{'y'} + tmp := make([]byte, 72) + for _, startLen := range []int{0, 100, 1000, 10000, 100000} { + xBytes := bytes.Repeat(x, startLen) + for _, growLen := range []int{0, 100, 1000, 10000, 100000} { + buf := NewBuffer(xBytes) + // If we read, this affects buf.off, which is good to test. + readBytes, _ := buf.Read(tmp) + buf.Grow(growLen) + yBytes := bytes.Repeat(y, growLen) + // Check no allocation occurs in write, as long as we're single-threaded. + var m1, m2 runtime.MemStats + runtime.ReadMemStats(&m1) + buf.Write(yBytes) + runtime.ReadMemStats(&m2) + if runtime.GOMAXPROCS(-1) == 1 && m1.Mallocs != m2.Mallocs { + t.Errorf("allocation occurred during write") + } + // Check that buffer has correct data. + if !bytes.Equal(buf.Bytes()[0:startLen-readBytes], xBytes[readBytes:]) { + t.Errorf("bad initial data at %d %d", startLen, growLen) + } + if !bytes.Equal(buf.Bytes()[startLen-readBytes:startLen-readBytes+growLen], yBytes) { + t.Errorf("bad written data at %d %d", startLen, growLen) + } + } + } +} + +// Was a bug: used to give EOF reading empty slice at EOF. +func TestReadEmptyAtEOF(t *testing.T) { + b := new(Buffer) + slice := make([]byte, 0) + n, err := b.Read(slice) + if err != nil { + t.Errorf("read error: %v", err) + } + if n != 0 { + t.Errorf("wrong count; got %d want 0", n) + } +} + +// Tests that we occasionally compact. Issue 5154. +func TestBufferGrowth(t *testing.T) { + var b Buffer + buf := make([]byte, 1024) + b.Write(buf[0:1]) + var cap0 int + for i := 0; i < 5<<10; i++ { + b.Write(buf) + b.Read(buf) + if i == 0 { + cap0 = cap(b.buf) + } + } + cap1 := cap(b.buf) + // (*Buffer).grow allows for 2x capacity slop before sliding, + // so set our error threshold at 3x. + if cap1 > cap0*3 { + t.Errorf("buffer cap = %d; too big (grew from %d)", cap1, cap0) + } +} + +// From Issue 5154. +func BenchmarkBufferNotEmptyWriteRead(b *testing.B) { + buf := make([]byte, 1024) + for i := 0; i < b.N; i++ { + var b Buffer + b.Write(buf[0:1]) + for i := 0; i < 5<<10; i++ { + b.Write(buf) + b.Read(buf) + } + } +} + +// Check that we don't compact too often. From Issue 5154. +func BenchmarkBufferFullSmallReads(b *testing.B) { + buf := make([]byte, 1024) + for i := 0; i < b.N; i++ { + var b Buffer + b.Write(buf) + for b.Len()+20 < cap(b.buf) { + b.Write(buf[:10]) + } + for i := 0; i < 5<<10; i++ { + b.Read(buf[:1]) + b.Write(buf[:1]) + } + } +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/crc32.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/crc32.go new file mode 100644 index 000000000..631c9d610 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/crc32.go @@ -0,0 +1,30 @@ +// Copyright 2011 The LevelDB-Go Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package util + +import ( + "hash/crc32" +) + +var table = crc32.MakeTable(crc32.Castagnoli) + +// CRC is a CRC-32 checksum computed using Castagnoli's polynomial. +type CRC uint32 + +// NewCRC creates a new crc based on the given bytes. +func NewCRC(b []byte) CRC { + return CRC(0).Update(b) +} + +// Update updates the crc with the given bytes. +func (c CRC) Update(b []byte) CRC { + return CRC(crc32.Update(uint32(c), table, b)) +} + +// Value returns a masked crc. +func (c CRC) Value() uint32 { + return uint32(c>>15|c<<17) + 0xa282ead8 +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/hash.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/hash.go new file mode 100644 index 000000000..54903660f --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/hash.go @@ -0,0 +1,48 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package util + +import ( + "bytes" + "encoding/binary" +) + +// Hash return hash of the given data. +func Hash(data []byte, seed uint32) uint32 { + // Similar to murmur hash + var m uint32 = 0xc6a4a793 + var r uint32 = 24 + h := seed ^ (uint32(len(data)) * m) + + buf := bytes.NewBuffer(data) + for buf.Len() >= 4 { + var w uint32 + binary.Read(buf, binary.LittleEndian, &w) + h += w + h *= m + h ^= (h >> 16) + } + + rest := buf.Bytes() + switch len(rest) { + default: + panic("not reached") + case 3: + h += uint32(rest[2]) << 16 + fallthrough + case 2: + h += uint32(rest[1]) << 8 + fallthrough + case 1: + h += uint32(rest[0]) + h *= m + h ^= (h >> r) + case 0: + } + + return h +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/range.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/range.go new file mode 100644 index 000000000..da0583123 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/range.go @@ -0,0 +1,16 @@ +// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package util + +// Range is a key range. +type Range struct { + // Start of the key range, include in the range. + Start []byte + + // Limit of the key range, not include in the range. + Limit []byte +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/util.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/util.go new file mode 100644 index 000000000..229c7d41f --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/util.go @@ -0,0 +1,49 @@ +// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package util provides utilities used throughout leveldb. +package util + +import ( + "errors" +) + +var ( + ErrNotFound = errors.New("leveldb: not found") +) + +// Releaser is the interface that wraps the basic Release method. +type Releaser interface { + // Release releases associated resources. Release should always success + // and can be called multipe times without causing error. + Release() +} + +// ReleaseSetter is the interface that wraps the basic SetReleaser method. +type ReleaseSetter interface { + // SetReleaser associates the given releaser to the resources. The + // releaser will be called once coresponding resources released. + // Calling SetReleaser with nil will clear the releaser. + SetReleaser(releaser Releaser) +} + +// BasicReleaser provides basic implementation of Releaser and ReleaseSetter. +type BasicReleaser struct { + releaser Releaser +} + +// Release implements Releaser.Release. +func (r *BasicReleaser) Release() { + if r.releaser != nil { + r.releaser.Release() + r.releaser = nil + } +} + +// SetReleaser implements ReleaseSetter.SetReleaser. +func (r *BasicReleaser) SetReleaser(releaser Releaser) { + r.releaser = releaser +} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/version.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/version.go new file mode 100644 index 000000000..4c54d6480 --- /dev/null +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/version.go @@ -0,0 +1,428 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "errors" + "sync/atomic" + "unsafe" + + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/util" +) + +var levelMaxSize [kNumLevels]float64 + +func init() { + // Precompute max size of each level + for level := range levelMaxSize { + res := float64(10 * 1048576) + for n := level; n > 1; n-- { + res *= 10 + } + levelMaxSize[level] = res + } +} + +type tSet struct { + level int + table *tFile +} + +type version struct { + s *session + + tables [kNumLevels]tFiles + + // Level that should be compacted next and its compaction score. + // Score < 1 means compaction is not strictly needed. These fields + // are initialized by ComputeCompaction() + cLevel int + cScore float64 + + cSeek unsafe.Pointer + + ref int + next *version +} + +func (v *version) release_NB() { + v.ref-- + if v.ref > 0 { + return + } + if v.ref < 0 { + panic("negative version ref") + } + + s := v.s + + tables := make(map[uint64]bool) + for _, tt := range v.next.tables { + for _, t := range tt { + num := t.file.Num() + tables[num] = true + } + } + + for _, tt := range v.tables { + for _, t := range tt { + num := t.file.Num() + if _, ok := tables[num]; !ok { + s.tops.remove(t) + } + } + } + + v.next.release_NB() + v.next = nil +} + +func (v *version) release() { + v.s.vmu.Lock() + v.release_NB() + v.s.vmu.Unlock() +} + +func (v *version) get(key iKey, ro *opt.ReadOptions) (value []byte, cstate bool, err error) { + s := v.s + + ukey := key.ukey() + + var tset *tSet + tseek := true + + // We can search level-by-level since entries never hop across + // levels. Therefore we are guaranteed that if we find data + // in an smaller level, later levels are irrelevant. + for level, ts := range v.tables { + if len(ts) == 0 { + continue + } + + if level == 0 { + // Level-0 files may overlap each other. Find all files that + // overlap user_key and process them in order from newest to + var tmp tFiles + for _, t := range ts { + if s.icmp.uCompare(ukey, t.min.ukey()) >= 0 && + s.icmp.uCompare(ukey, t.max.ukey()) <= 0 { + tmp = append(tmp, t) + } + } + + if len(tmp) == 0 { + continue + } + + tmp.sortByNum() + ts = tmp + } else { + i := ts.searchMax(key, s.icmp) + if i >= len(ts) || s.icmp.uCompare(ukey, ts[i].min.ukey()) < 0 { + continue + } + + ts = ts[i : i+1] + } + + var l0found bool + var l0seq uint64 + var l0type vType + var l0value []byte + for _, t := range ts { + if tseek { + if tset == nil { + tset = &tSet{level, t} + } else if tset.table.incrSeek() <= 0 { + cstate = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset)) + tseek = false + } + } + + var _rkey, rval []byte + _rkey, rval, err = s.tops.get(t, key, ro) + if err == ErrNotFound { + continue + } else if err != nil { + return + } + + rkey := iKey(_rkey) + if seq, t, ok := rkey.parseNum(); ok { + if s.icmp.uCompare(ukey, rkey.ukey()) == 0 { + if level == 0 { + if seq >= l0seq { + l0found = true + l0seq = seq + l0type = t + l0value = rval + } + } else { + switch t { + case tVal: + value = rval + case tDel: + err = ErrNotFound + default: + panic("invalid type") + } + return + } + } + } else { + err = errors.New("leveldb: internal key corrupted") + return + } + } + if level == 0 && l0found { + switch l0type { + case tVal: + value = l0value + case tDel: + err = ErrNotFound + default: + panic("invalid type") + } + return + } + } + + err = ErrNotFound + return +} + +func (v *version) getIterators(slice *util.Range, ro *opt.ReadOptions) (its []iterator.Iterator) { + s := v.s + + // Merge all level zero files together since they may overlap + for _, t := range v.tables[0] { + it := s.tops.newIterator(t, slice, ro) + its = append(its, it) + } + + strict := s.o.GetStrict(opt.StrictIterator) || ro.GetStrict(opt.StrictIterator) + for _, tt := range v.tables[1:] { + if len(tt) == 0 { + continue + } + + it := iterator.NewIndexedIterator(tt.newIndexIterator(s.tops, s.icmp, slice, ro), strict, true) + its = append(its, it) + } + + return +} + +func (v *version) newStaging() *versionStaging { + return &versionStaging{base: v} +} + +// Spawn a new version based on this version. +func (v *version) spawn(r *sessionRecord) *version { + staging := v.newStaging() + staging.commit(r) + return staging.finish() +} + +func (v *version) fillRecord(r *sessionRecord) { + for level, ts := range v.tables { + for _, t := range ts { + r.addTableFile(level, t) + } + } +} + +func (v *version) tLen(level int) int { + return len(v.tables[level]) +} + +func (v *version) offsetOf(key iKey) (n uint64, err error) { + for level, tt := range v.tables { + for _, t := range tt { + if v.s.icmp.Compare(t.max, key) <= 0 { + // Entire file is before "key", so just add the file size + n += t.size + } else if v.s.icmp.Compare(t.min, key) > 0 { + // Entire file is after "key", so ignore + if level > 0 { + // Files other than level 0 are sorted by meta->min, so + // no further files in this level will contain data for + // "key". + break + } + } else { + // "key" falls in the range for this table. Add the + // approximate offset of "key" within the table. + var nn uint64 + nn, err = v.s.tops.offsetOf(t, key) + if err != nil { + return 0, err + } + n += nn + } + } + } + + return +} + +func (v *version) pickLevel(min, max []byte) (level int) { + if !v.tables[0].isOverlaps(min, max, false, v.s.icmp) { + var r tFiles + for ; level < kMaxMemCompactLevel; level++ { + if v.tables[level+1].isOverlaps(min, max, true, v.s.icmp) { + break + } + v.tables[level+2].getOverlaps(min, max, &r, true, v.s.icmp.ucmp) + if r.size() > kMaxGrandParentOverlapBytes { + break + } + } + } + + return +} + +func (v *version) computeCompaction() { + // Precomputed best level for next compaction + var bestLevel int = -1 + var bestScore float64 = -1 + + for level, ff := range v.tables { + var score float64 + if level == 0 { + // We treat level-0 specially by bounding the number of files + // instead of number of bytes for two reasons: + // + // (1) With larger write-buffer sizes, it is nice not to do too + // many level-0 compactions. + // + // (2) The files in level-0 are merged on every read and + // therefore we wish to avoid too many files when the individual + // file size is small (perhaps because of a small write-buffer + // setting, or very high compression ratios, or lots of + // overwrites/deletions). + score = float64(len(ff)) / kL0_CompactionTrigger + } else { + score = float64(ff.size()) / levelMaxSize[level] + } + + if score > bestScore { + bestLevel = level + bestScore = score + } + } + + v.cLevel = bestLevel + v.cScore = bestScore +} + +func (v *version) needCompaction() bool { + return v.cScore >= 1 || atomic.LoadPointer(&v.cSeek) != nil +} + +type versionStaging struct { + base *version + tables [kNumLevels]struct { + added map[uint64]ntRecord + deleted map[uint64]struct{} + } +} + +func (p *versionStaging) commit(r *sessionRecord) { + btt := p.base.tables + + // deleted tables + for _, tr := range r.deletedTables { + tm := &(p.tables[tr.level]) + + bt := btt[tr.level] + if len(bt) > 0 { + if tm.deleted == nil { + tm.deleted = make(map[uint64]struct{}) + } + tm.deleted[tr.num] = struct{}{} + } + + if tm.added != nil { + delete(tm.added, tr.num) + } + } + + // new tables + for _, tr := range r.addedTables { + tm := &(p.tables[tr.level]) + + if tm.added == nil { + tm.added = make(map[uint64]ntRecord) + } + tm.added[tr.num] = tr + + if tm.deleted != nil { + delete(tm.deleted, tr.num) + } + } +} + +func (p *versionStaging) finish() *version { + s := p.base.s + btt := p.base.tables + + // build new version + nv := &version{s: s} + for level, tm := range p.tables { + bt := btt[level] + + n := len(bt) + len(tm.added) - len(tm.deleted) + if n < 0 { + n = 0 + } + nt := make(tFiles, 0, n) + + // base tables + for _, t := range bt { + if _, ok := tm.deleted[t.file.Num()]; ok { + continue + } + if _, ok := tm.added[t.file.Num()]; ok { + continue + } + nt = append(nt, t) + } + + // new tables + for _, tr := range tm.added { + nt = append(nt, tr.makeFile(s)) + } + + // sort tables + nt.sortByKey(s.icmp) + nv.tables[level] = nt + } + + // compute compaction score for new version + nv.computeCompaction() + + return nv +} + +type versionReleaser struct { + v *version + once bool +} + +func (vr *versionReleaser) Release() { + v := vr.v + v.s.vmu.Lock() + if !vr.once { + v.release_NB() + vr.once = true + } + v.s.vmu.Unlock() +} |