diff options
Diffstat (limited to 'vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go')
-rw-r--r-- | vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go b/vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go new file mode 100644 index 000000000..bab0e9970 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go @@ -0,0 +1,116 @@ +// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package filter + +import ( + "github.com/syndtr/goleveldb/leveldb/util" +) + +func bloomHash(key []byte) uint32 { + return util.Hash(key, 0xbc9f1d34) +} + +type bloomFilter int + +// The bloom filter serializes its parameters and is backward compatible +// with respect to them. Therefor, its parameters are not added to its +// name. +func (bloomFilter) Name() string { + return "leveldb.BuiltinBloomFilter" +} + +func (f bloomFilter) Contains(filter, key []byte) bool { + nBytes := len(filter) - 1 + if nBytes < 1 { + return false + } + nBits := uint32(nBytes * 8) + + // Use the encoded k so that we can read filters generated by + // bloom filters created using different parameters. + k := filter[nBytes] + if k > 30 { + // Reserved for potentially new encodings for short bloom filters. + // Consider it a match. + return true + } + + kh := bloomHash(key) + delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits + for j := uint8(0); j < k; j++ { + bitpos := kh % nBits + if (uint32(filter[bitpos/8]) & (1 << (bitpos % 8))) == 0 { + return false + } + kh += delta + } + return true +} + +func (f bloomFilter) NewGenerator() FilterGenerator { + // Round down to reduce probing cost a little bit. + k := uint8(f * 69 / 100) // 0.69 =~ ln(2) + if k < 1 { + k = 1 + } else if k > 30 { + k = 30 + } + return &bloomFilterGenerator{ + n: int(f), + k: k, + } +} + +type bloomFilterGenerator struct { + n int + k uint8 + + keyHashes []uint32 +} + +func (g *bloomFilterGenerator) Add(key []byte) { + // Use double-hashing to generate a sequence of hash values. + // See analysis in [Kirsch,Mitzenmacher 2006]. + g.keyHashes = append(g.keyHashes, bloomHash(key)) +} + +func (g *bloomFilterGenerator) Generate(b Buffer) { + // Compute bloom filter size (in both bits and bytes) + nBits := uint32(len(g.keyHashes) * g.n) + // For small n, we can see a very high false positive rate. Fix it + // by enforcing a minimum bloom filter length. + if nBits < 64 { + nBits = 64 + } + nBytes := (nBits + 7) / 8 + nBits = nBytes * 8 + + dest := b.Alloc(int(nBytes) + 1) + dest[nBytes] = g.k + for _, kh := range g.keyHashes { + delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits + for j := uint8(0); j < g.k; j++ { + bitpos := kh % nBits + dest[bitpos/8] |= (1 << (bitpos % 8)) + kh += delta + } + } + + g.keyHashes = g.keyHashes[:0] +} + +// NewBloomFilter creates a new initialized bloom filter for given +// bitsPerKey. +// +// Since bitsPerKey is persisted individually for each bloom filter +// serialization, bloom filters are backwards compatible with respect to +// changing bitsPerKey. This means that no big performance penalty will +// be experienced when changing the parameter. See documentation for +// opt.Options.Filter for more information. +func NewBloomFilter(bitsPerKey int) Filter { + return bloomFilter(bitsPerKey) +} |