diff options
author | Péter Szilágyi <peterke@gmail.com> | 2017-08-29 19:13:11 +0800 |
---|---|---|
committer | Péter Szilágyi <peterke@gmail.com> | 2017-09-06 16:14:19 +0800 |
commit | f585f9eee8cb18423c23fe8b517b5b4cbe3b3755 (patch) | |
tree | 08c232ee58318c20f971cf8e3f5dfa09f1e2caf7 /core/bloombits | |
parent | 4ea4d2dc3473afd9d2eda6ef6b359accce1f0946 (diff) | |
download | dexon-f585f9eee8cb18423c23fe8b517b5b4cbe3b3755.tar.gz dexon-f585f9eee8cb18423c23fe8b517b5b4cbe3b3755.tar.zst dexon-f585f9eee8cb18423c23fe8b517b5b4cbe3b3755.zip |
core, eth: clean up bloom filtering, add some tests
Diffstat (limited to 'core/bloombits')
-rw-r--r-- | core/bloombits/doc.go | 18 | ||||
-rw-r--r-- | core/bloombits/fetcher_test.go | 101 | ||||
-rw-r--r-- | core/bloombits/generator.go | 84 | ||||
-rw-r--r-- | core/bloombits/generator_test.go | 60 | ||||
-rw-r--r-- | core/bloombits/matcher.go | 878 | ||||
-rw-r--r-- | core/bloombits/matcher_test.go | 283 | ||||
-rw-r--r-- | core/bloombits/scheduler.go | 181 | ||||
-rw-r--r-- | core/bloombits/scheduler_test.go | 105 | ||||
-rw-r--r-- | core/bloombits/utils.go | 63 |
9 files changed, 1086 insertions, 687 deletions
diff --git a/core/bloombits/doc.go b/core/bloombits/doc.go new file mode 100644 index 000000000..3d159e74f --- /dev/null +++ b/core/bloombits/doc.go @@ -0,0 +1,18 @@ +// Copyright 2017 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +// Package bloombits implements bloom filtering on batches of data. +package bloombits diff --git a/core/bloombits/fetcher_test.go b/core/bloombits/fetcher_test.go deleted file mode 100644 index 9c229cf8d..000000000 --- a/core/bloombits/fetcher_test.go +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2017 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. -package bloombits - -import ( - "bytes" - "encoding/binary" - "math/rand" - "sync" - "sync/atomic" - "testing" - "time" -) - -const testFetcherReqCount = 5000 - -func fetcherTestVector(b uint, s uint64) []byte { - r := make([]byte, 10) - binary.BigEndian.PutUint16(r[0:2], uint16(b)) - binary.BigEndian.PutUint64(r[2:10], s) - return r -} - -func TestFetcher(t *testing.T) { - testFetcher(t, 1) -} - -func TestFetcherMultipleReaders(t *testing.T) { - testFetcher(t, 10) -} - -func testFetcher(t *testing.T, cnt int) { - f := &fetcher{ - requestMap: make(map[uint64]fetchRequest), - } - distCh := make(chan distRequest, channelCap) - stop := make(chan struct{}) - var reqCount uint32 - - for i := 0; i < 10; i++ { - go func() { - for { - req, ok := <-distCh - if !ok { - return - } - time.Sleep(time.Duration(rand.Intn(100000))) - atomic.AddUint32(&reqCount, 1) - f.deliver([]uint64{req.sectionIndex}, [][]byte{fetcherTestVector(req.bloomIndex, req.sectionIndex)}) - } - }() - } - - var wg, wg2 sync.WaitGroup - for cc := 0; cc < cnt; cc++ { - wg.Add(1) - in := make(chan uint64, channelCap) - out := f.fetch(in, distCh, stop, &wg2) - - time.Sleep(time.Millisecond * 10 * time.Duration(cc)) - go func() { - for i := uint64(0); i < testFetcherReqCount; i++ { - in <- i - } - }() - - go func() { - for i := uint64(0); i < testFetcherReqCount; i++ { - bv := <-out - if !bytes.Equal(bv, fetcherTestVector(0, i)) { - if len(bv) != 10 { - t.Errorf("Vector #%d length is %d, expected 10", i, len(bv)) - } else { - j := binary.BigEndian.Uint64(bv[2:10]) - t.Errorf("Expected vector #%d, fetched #%d", i, j) - } - } - } - wg.Done() - }() - } - - wg.Wait() - close(stop) - if reqCount != testFetcherReqCount { - t.Errorf("Request count mismatch: expected %v, got %v", testFetcherReqCount, reqCount) - } -} diff --git a/core/bloombits/generator.go b/core/bloombits/generator.go new file mode 100644 index 000000000..04a7f5146 --- /dev/null +++ b/core/bloombits/generator.go @@ -0,0 +1,84 @@ +// Copyright 2017 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package bloombits + +import ( + "errors" + + "github.com/ethereum/go-ethereum/core/types" +) + +// errSectionOutOfBounds is returned if the user tried to add more bloom filters +// to the batch than available space, or if tries to retrieve above the capacity, +var errSectionOutOfBounds = errors.New("section out of bounds") + +// Generator takes a number of bloom filters and generates the rotated bloom bits +// to be used for batched filtering. +type Generator struct { + blooms [types.BloomBitLength][]byte // Rotated blooms for per-bit matching + sections uint // Number of sections to batch together + nextBit uint // Next bit to set when adding a bloom +} + +// NewGenerator creates a rotated bloom generator that can iteratively fill a +// batched bloom filter's bits. +func NewGenerator(sections uint) (*Generator, error) { + if sections%8 != 0 { + return nil, errors.New("section count not multiple of 8") + } + b := &Generator{sections: sections} + for i := 0; i < types.BloomBitLength; i++ { + b.blooms[i] = make([]byte, sections/8) + } + return b, nil +} + +// AddBloom takes a single bloom filter and sets the corresponding bit column +// in memory accordingly. +func (b *Generator) AddBloom(bloom types.Bloom) error { + // Make sure we're not adding more bloom filters than our capacity + if b.nextBit >= b.sections { + return errSectionOutOfBounds + } + // Rotate the bloom and insert into our collection + byteMask := b.nextBit / 8 + bitMask := byte(1) << byte(7-b.nextBit%8) + + for i := 0; i < types.BloomBitLength; i++ { + bloomByteMask := types.BloomByteLength - 1 - i/8 + bloomBitMask := byte(1) << byte(i%8) + + if (bloom[bloomByteMask] & bloomBitMask) != 0 { + b.blooms[i][byteMask] |= bitMask + } + } + b.nextBit++ + + return nil +} + +// Bitset returns the bit vector belonging to the given bit index after all +// blooms have been added. +func (b *Generator) Bitset(idx uint) ([]byte, error) { + if b.nextBit != b.sections { + return nil, errors.New("bloom not fully generated yet") + } + if idx >= b.sections { + return nil, errSectionOutOfBounds + } + return b.blooms[idx], nil +} diff --git a/core/bloombits/generator_test.go b/core/bloombits/generator_test.go new file mode 100644 index 000000000..f4aa9551c --- /dev/null +++ b/core/bloombits/generator_test.go @@ -0,0 +1,60 @@ +// Copyright 2017 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package bloombits + +import ( + "bytes" + "math/rand" + "testing" + + "github.com/ethereum/go-ethereum/core/types" +) + +// Tests that batched bloom bits are correctly rotated from the input bloom +// filters. +func TestGenerator(t *testing.T) { + // Generate the input and the rotated output + var input, output [types.BloomBitLength][types.BloomByteLength]byte + + for i := 0; i < types.BloomBitLength; i++ { + for j := 0; j < types.BloomBitLength; j++ { + bit := byte(rand.Int() % 2) + + input[i][j/8] |= bit << byte(7-j%8) + output[types.BloomBitLength-1-j][i/8] |= bit << byte(7-i%8) + } + } + // Crunch the input through the generator and verify the result + gen, err := NewGenerator(types.BloomBitLength) + if err != nil { + t.Fatalf("failed to create bloombit generator: %v", err) + } + for i, bloom := range input { + if err := gen.AddBloom(bloom); err != nil { + t.Fatalf("bloom %d: failed to add: %v", i, err) + } + } + for i, want := range output { + have, err := gen.Bitset(uint(i)) + if err != nil { + t.Fatalf("output %d: failed to retrieve bits: %v", i, err) + } + if !bytes.Equal(have, want[:]) { + t.Errorf("output %d: bit vector mismatch have %x, want %x", i, have, want) + } + } +} diff --git a/core/bloombits/matcher.go b/core/bloombits/matcher.go index 5a7df6b1c..e365fd6d0 100644 --- a/core/bloombits/matcher.go +++ b/core/bloombits/matcher.go @@ -13,327 +13,350 @@ // // You should have received a copy of the GNU Lesser General Public License // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + package bloombits import ( + "errors" + "math" + "sort" "sync" + "sync/atomic" + "time" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/bitutil" - "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" ) -const channelCap = 100 +// bloomIndexes represents the bit indexes inside the bloom filter that belong +// to some key. +type bloomIndexes [3]uint -// fetcher handles bit vector retrieval pipelines for a single bit index -type fetcher struct { - bloomIndex uint - requestMap map[uint64]fetchRequest - requestLock sync.RWMutex -} +// calcBloomIndexes returns the bloom filter bit indexes belonging to the given key. +func calcBloomIndexes(b []byte) bloomIndexes { + b = crypto.Keccak256(b) -// fetchRequest represents the state of a bit vector requested from a fetcher. When a distRequest has been sent to the distributor but -// the data has not been delivered yet, queued is true. When delivered, it is stored in the data field and the delivered channel is closed. -type fetchRequest struct { - data []byte - queued bool - delivered chan struct{} + var idxs bloomIndexes + for i := 0; i < len(idxs); i++ { + idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1]) + } + return idxs } -// distRequest is sent by the fetcher to the distributor which groups and prioritizes these requests. -type distRequest struct { - bloomIndex uint - sectionIndex uint64 +// partialMatches with a non-nil vector represents a section in which some sub- +// matchers have already found potential matches. Subsequent sub-matchers will +// binary AND their matches with this vector. If vector is nil, it represents a +// section to be processed by the first sub-matcher. +type partialMatches struct { + section uint64 + bitset []byte } -// fetch creates a retrieval pipeline, receiving section indexes from sectionCh and returning the results -// in the same order through the returned channel. Multiple fetch instances of the same fetcher are allowed -// to run in parallel, in case the same bit index appears multiple times in the filter structure. Each section -// is requested only once, requests are sent to the request distributor (part of Matcher) through distCh. -func (f *fetcher) fetch(sectionCh chan uint64, distCh chan distRequest, stop chan struct{}, wg *sync.WaitGroup) chan []byte { - dataCh := make(chan []byte, channelCap) - returnCh := make(chan uint64, channelCap) - wg.Add(2) - - go func() { - defer wg.Done() - defer close(returnCh) - - for { - select { - case <-stop: - return - case idx, ok := <-sectionCh: - if !ok { - return - } - - req := false - f.requestLock.Lock() - r := f.requestMap[idx] - if r.data == nil { - req = !r.queued - r.queued = true - if r.delivered == nil { - r.delivered = make(chan struct{}) - } - f.requestMap[idx] = r - } - f.requestLock.Unlock() - if req { - distCh <- distRequest{bloomIndex: f.bloomIndex, sectionIndex: idx} // success is guaranteed, distibuteRequests shuts down after fetch - } - select { - case <-stop: - return - case returnCh <- idx: - } - } - } - }() - - go func() { - defer wg.Done() - defer close(dataCh) - - for { - select { - case <-stop: - return - case idx, ok := <-returnCh: - if !ok { - return - } - - f.requestLock.RLock() - r := f.requestMap[idx] - f.requestLock.RUnlock() - - if r.data == nil { - select { - case <-stop: - return - case <-r.delivered: - f.requestLock.RLock() - r = f.requestMap[idx] - f.requestLock.RUnlock() - } - } - select { - case <-stop: - return - case dataCh <- r.data: - } - } - } - }() - - return dataCh +// Retrieval represents a request for retrieval task assignments for a given +// bit with the given number of fetch elements, or a response for such a request. +// It can also have the actual results set to be used as a delivery data struct. +type Retrieval struct { + Bit uint + Sections []uint64 + Bitsets [][]byte } -// deliver is called by the request distributor when a reply to a request has -// arrived -func (f *fetcher) deliver(sectionIdxList []uint64, data [][]byte) { - f.requestLock.Lock() - defer f.requestLock.Unlock() +// Matcher is a pipelined system of schedulers and logic matchers which perform +// binary AND/OR operations on the bit-streams, creating a stream of potential +// blocks to inspect for data content. +type Matcher struct { + sectionSize uint64 // Size of the data batches to filter on - for i, sectionIdx := range sectionIdxList { - r := f.requestMap[sectionIdx] - if r.data != nil { - panic("BloomBits section data delivered twice") - } - r.data = data[i] - close(r.delivered) - f.requestMap[sectionIdx] = r - } -} + addresses []bloomIndexes // Addresses the system is filtering for + topics [][]bloomIndexes // Topics the system is filtering for + schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits -// Matcher is a pipelined structure of fetchers and logic matchers which perform -// binary AND/OR operations on the bitstreams, finally creating a stream of potential matches. -type Matcher struct { - addresses []types.BloomIndexList - topics [][]types.BloomIndexList - fetchers map[uint]*fetcher - sectionSize uint64 - - distCh chan distRequest - reqs map[uint][]uint64 - freeQueues map[uint]struct{} - allocQueue []chan uint - running bool - stop chan struct{} - lock sync.Mutex - wg, distWg sync.WaitGroup + retrievers chan chan uint // Retriever processes waiting for bit allocations + counters chan chan uint // Retriever processes waiting for task count reports + retrievals chan chan *Retrieval // Retriever processes waiting for task allocations + deliveries chan *Retrieval // Retriever processes waiting for task response deliveries + + running uint32 // Atomic flag whether a session is live or not } -// NewMatcher creates a new Matcher instance +// NewMatcher creates a new pipeline for retrieving bloom bit streams and doing +// address and topic filtering on them. func NewMatcher(sectionSize uint64, addresses []common.Address, topics [][]common.Hash) *Matcher { m := &Matcher{ - fetchers: make(map[uint]*fetcher), - reqs: make(map[uint][]uint64), - freeQueues: make(map[uint]struct{}), - distCh: make(chan distRequest, channelCap), sectionSize: sectionSize, + schedulers: make(map[uint]*scheduler), + retrievers: make(chan chan uint), + counters: make(chan chan uint), + retrievals: make(chan chan *Retrieval), + deliveries: make(chan *Retrieval), } m.setAddresses(addresses) m.setTopics(topics) return m } -// setAddresses matches only logs that are generated from addresses that are included -// in the given addresses. +// setAddresses configures the matcher to only return logs that are generated +// from addresses that are included in the given list. func (m *Matcher) setAddresses(addresses []common.Address) { - m.addresses = make([]types.BloomIndexList, len(addresses)) + // Calculate the bloom bit indexes for the addresses we're interested in + m.addresses = make([]bloomIndexes, len(addresses)) for i, address := range addresses { - m.addresses[i] = types.BloomIndexes(address.Bytes()) + m.addresses[i] = calcBloomIndexes(address.Bytes()) } - + // For every bit, create a scheduler to load/download the bit vectors for _, bloomIndexList := range m.addresses { for _, bloomIndex := range bloomIndexList { - m.newFetcher(bloomIndex) + m.addScheduler(bloomIndex) } } } -// setTopics matches only logs that have topics matching the given topics. -func (m *Matcher) setTopics(topics [][]common.Hash) { +// setTopics configures the matcher to only return logs that have topics matching +// the given list. +func (m *Matcher) setTopics(topicsList [][]common.Hash) { + // Calculate the bloom bit indexes for the topics we're interested in m.topics = nil -loop: - for _, topicList := range topics { - t := make([]types.BloomIndexList, len(topicList)) - for i, topic := range topicList { - if (topic == common.Hash{}) { - continue loop - } - t[i] = types.BloomIndexes(topic.Bytes()) + + for _, topics := range topicsList { + bloomBits := make([]bloomIndexes, len(topics)) + for i, topic := range topics { + bloomBits[i] = calcBloomIndexes(topic.Bytes()) } - m.topics = append(m.topics, t) + m.topics = append(m.topics, bloomBits) } - + // For every bit, create a scheduler to load/download the bit vectors for _, bloomIndexLists := range m.topics { for _, bloomIndexList := range bloomIndexLists { for _, bloomIndex := range bloomIndexList { - m.newFetcher(bloomIndex) + m.addScheduler(bloomIndex) } } } } -// match creates a daisy-chain of sub-matchers, one for the address set and one for each topic set, each -// sub-matcher receiving a section only if the previous ones have all found a potential match in one of -// the blocks of the section, then binary AND-ing its own matches and forwaring the result to the next one -func (m *Matcher) match(processCh chan partialMatches) chan partialMatches { - indexLists := m.topics - if len(m.addresses) > 0 { - indexLists = append([][]types.BloomIndexList{m.addresses}, indexLists...) +// addScheduler adds a bit stream retrieval scheduler for the given bit index if +// it has not existed before. If the bit is already selected for filtering, the +// existing scheduler can be used. +func (m *Matcher) addScheduler(idx uint) { + if _, ok := m.schedulers[idx]; ok { + return } - m.distributeRequests() + m.schedulers[idx] = newScheduler(idx) +} - for _, subIndexList := range indexLists { - processCh = m.subMatch(processCh, subIndexList) +// Start starts the matching process and returns a stream of bloom matches in +// a given range of blocks. If there are no more matches in the range, the result +// channel is closed. +func (m *Matcher) Start(begin, end uint64, results chan uint64) (*MatcherSession, error) { + // Make sure we're not creating concurrent sessions + if atomic.SwapUint32(&m.running, 1) == 1 { + return nil, errors.New("matcher already running") } - return processCh -} + defer atomic.StoreUint32(&m.running, 0) -// partialMatches with a non-nil vector represents a section in which some sub-matchers have already -// found potential matches. Subsequent sub-matchers will binary AND their matches with this vector. -// If vector is nil, it represents a section to be processed by the first sub-matcher. -type partialMatches struct { - sectionIndex uint64 - vector []byte + // Initiate a new matching round + session := &MatcherSession{ + matcher: m, + quit: make(chan struct{}), + kill: make(chan struct{}), + } + for _, scheduler := range m.schedulers { + scheduler.reset() + } + sink := m.run(begin, end, cap(results), session) + + // Read the output from the result sink and deliver to the user + session.pend.Add(1) + go func() { + defer session.pend.Done() + defer close(results) + + for { + select { + case <-session.quit: + return + + case res, ok := <-sink: + // New match result found + if !ok { + return + } + // Calculate the first and last blocks of the section + sectionStart := res.section * m.sectionSize + + first := sectionStart + if begin > first { + first = begin + } + last := sectionStart + m.sectionSize - 1 + if end < last { + last = end + } + // Iterate over all the blocks in the section and return the matching ones + for i := first; i <= last; i++ { + // If the bitset is nil, we're a special match-all cornercase + if res.bitset == nil { + select { + case <-session.quit: + return + case results <- i: + } + continue + } + // Skip the entire byte if no matches are found inside + next := res.bitset[(i-sectionStart)/8] + if next == 0 { + i += 7 + continue + } + // Some bit it set, do the actual submatching + if bit := 7 - i%8; next&(1<<bit) != 0 { + select { + case <-session.quit: + return + case results <- i: + } + } + } + } + } + }() + return session, nil } -// newFetcher adds a fetcher for the given bit index if it has not existed before -func (m *Matcher) newFetcher(idx uint) { - if _, ok := m.fetchers[idx]; ok { - return +// run creates a daisy-chain of sub-matchers, one for the address set and one +// for each topic set, each sub-matcher receiving a section only if the previous +// ones have all found a potential match in one of the blocks of the section, +// then binary AND-ing its own matches and forwaring the result to the next one. +// +// The method starts feeding the section indexes into the first sub-matcher on a +// new goroutine and returns a sink channel receiving the results. +func (m *Matcher) run(begin, end uint64, buffer int, session *MatcherSession) chan *partialMatches { + // Create the source channel and feed section indexes into + source := make(chan *partialMatches, buffer) + + session.pend.Add(1) + go func() { + defer session.pend.Done() + defer close(source) + + for i := begin / m.sectionSize; i <= end/m.sectionSize; i++ { + select { + case <-session.quit: + return + case source <- &partialMatches{i, nil}: + } + } + }() + // Assemble the daisy-chained filtering pipeline + blooms := m.topics + if len(m.addresses) > 0 { + blooms = append([][]bloomIndexes{m.addresses}, blooms...) } - f := &fetcher{ - bloomIndex: idx, - requestMap: make(map[uint64]fetchRequest), + next := source + dist := make(chan *request, buffer) + + for _, bloom := range blooms { + next = m.subMatch(next, dist, bloom, session) } - m.fetchers[idx] = f + // Start the request distribution + session.pend.Add(1) + go m.distributor(dist, session) + + return next } // subMatch creates a sub-matcher that filters for a set of addresses or topics, binary OR-s those matches, then -// binary AND-s the result to the daisy-chain input (processCh) and forwards it to the daisy-chain output. +// binary AND-s the result to the daisy-chain input (source) and forwards it to the daisy-chain output. // The matches of each address/topic are calculated by fetching the given sections of the three bloom bit indexes belonging to // that address/topic, and binary AND-ing those vectors together. -func (m *Matcher) subMatch(processCh chan partialMatches, bloomIndexLists []types.BloomIndexList) chan partialMatches { - // set up fetchers - fetchIndexChannels := make([][3]chan uint64, len(bloomIndexLists)) - fetchDataChannels := make([][3]chan []byte, len(bloomIndexLists)) - for i, bloomIndexList := range bloomIndexLists { - for j, bloomIndex := range bloomIndexList { - fetchIndexChannels[i][j] = make(chan uint64, channelCap) - fetchDataChannels[i][j] = m.fetchers[bloomIndex].fetch(fetchIndexChannels[i][j], m.distCh, m.stop, &m.wg) +func (m *Matcher) subMatch(source chan *partialMatches, dist chan *request, bloom []bloomIndexes, session *MatcherSession) chan *partialMatches { + // Start the concurrent schedulers for each bit required by the bloom filter + sectionSources := make([][3]chan uint64, len(bloom)) + sectionSinks := make([][3]chan []byte, len(bloom)) + for i, bits := range bloom { + for j, bit := range bits { + sectionSources[i][j] = make(chan uint64, cap(source)) + sectionSinks[i][j] = make(chan []byte, cap(source)) + + m.schedulers[bit].run(sectionSources[i][j], dist, sectionSinks[i][j], session.quit, &session.pend) } } - fetchedCh := make(chan partialMatches, channelCap) // entries from processCh are forwarded here after fetches have been initiated - resultsCh := make(chan partialMatches, channelCap) + process := make(chan *partialMatches, cap(source)) // entries from source are forwarded here after fetches have been initiated + results := make(chan *partialMatches, cap(source)) - m.wg.Add(2) - // goroutine for starting retrievals + session.pend.Add(2) go func() { - defer m.wg.Done() - + // Tear down the goroutine and terminate all source channels + defer session.pend.Done() + defer close(process) + + defer func() { + for _, bloomSources := range sectionSources { + for _, bitSource := range bloomSources { + close(bitSource) + } + } + }() + // Read sections from the source channel and multiplex into all bit-schedulers for { select { - case <-m.stop: + case <-session.quit: return - case s, ok := <-processCh: + + case subres, ok := <-source: + // New subresult from previous link if !ok { - close(fetchedCh) - for _, fetchIndexChs := range fetchIndexChannels { - for _, fetchIndexCh := range fetchIndexChs { - close(fetchIndexCh) - } - } return } - - for _, fetchIndexChs := range fetchIndexChannels { - for _, fetchIndexCh := range fetchIndexChs { + // Multiplex the section index to all bit-schedulers + for _, bloomSources := range sectionSources { + for _, bitSource := range bloomSources { select { - case <-m.stop: + case <-session.quit: return - case fetchIndexCh <- s.sectionIndex: + case bitSource <- subres.section: } } } + // Notify the processor that this section will become available select { - case <-m.stop: + case <-session.quit: return - case fetchedCh <- s: + case process <- subres: } } } }() - // goroutine for processing retrieved data go func() { - defer m.wg.Done() + // Tear down the goroutine and terminate the final sink channel + defer session.pend.Done() + defer close(results) + // Read the source notifications and collect the delivered results for { select { - case <-m.stop: + case <-session.quit: return - case s, ok := <-fetchedCh: + + case subres, ok := <-process: + // Notified of a section being retrieved if !ok { - close(resultsCh) return } - + // Gather all the sub-results and merge them together var orVector []byte - for _, fetchDataChs := range fetchDataChannels { + for _, bloomSinks := range sectionSinks { var andVector []byte - for _, fetchDataCh := range fetchDataChs { + for _, bitSink := range bloomSinks { var data []byte select { - case <-m.stop: + case <-session.quit: return - case data = <-fetchDataCh: + case data = <-bitSink: } if andVector == nil { andVector = make([]byte, int(m.sectionSize/8)) @@ -352,228 +375,277 @@ func (m *Matcher) subMatch(processCh chan partialMatches, bloomIndexLists []type if orVector == nil { orVector = make([]byte, int(m.sectionSize/8)) } - if s.vector != nil { - bitutil.ANDBytes(orVector, orVector, s.vector) + if subres.bitset != nil { + bitutil.ANDBytes(orVector, orVector, subres.bitset) } if bitutil.TestBytes(orVector) { select { - case <-m.stop: + case <-session.quit: return - case resultsCh <- partialMatches{s.sectionIndex, orVector}: + case results <- &partialMatches{subres.section, orVector}: } } } } }() - - return resultsCh + return results } -// Start starts the matching process and returns a stream of bloom matches in -// a given range of blocks. -// It returns a results channel immediately and stops if Stop is called or there -// are no more matches in the range (in which case the results channel is closed). -// Start/Stop can be called multiple times for different ranges, in which case already -// delivered bit vectors are not requested again. -func (m *Matcher) Start(begin, end uint64) chan uint64 { - m.stop = make(chan struct{}) - processCh := make(chan partialMatches, channelCap) - resultsCh := make(chan uint64, channelCap) - - res := m.match(processCh) - - startSection := begin / m.sectionSize - endSection := end / m.sectionSize - - m.wg.Add(2) - go func() { - defer m.wg.Done() - defer close(processCh) +// distributor receives requests from the schedulers and queues them into a set +// of pending requests, which are assigned to retrievers wanting to fulfil them. +func (m *Matcher) distributor(dist chan *request, session *MatcherSession) { + defer session.pend.Done() + + var ( + requests = make(map[uint][]uint64) // Per-bit list of section requests, ordered by section number + unallocs = make(map[uint]struct{}) // Bits with pending requests but not allocated to any retriever + retrievers chan chan uint // Waiting retrievers (toggled to nil if unallocs is empty) + ) + var ( + allocs int // Number of active allocations to handle graceful shutdown requests + shutdown = session.quit // Shutdown request channel, will gracefully wait for pending requests + ) + + // assign is a helper method fo try to assign a pending bit an an actively + // listening servicer, or schedule it up for later when one arrives. + assign := func(bit uint) { + select { + case fetcher := <-m.retrievers: + allocs++ + fetcher <- bit + default: + // No retrievers active, start listening for new ones + retrievers = m.retrievers + unallocs[bit] = struct{}{} + } + } - for i := startSection; i <= endSection; i++ { - select { - case processCh <- partialMatches{i, nil}: - case <-m.stop: + for { + select { + case <-shutdown: + // Graceful shutdown requested, wait until all pending requests are honoured + if allocs == 0 { return } - } - }() + shutdown = nil - go func() { - defer m.wg.Done() - defer close(resultsCh) + case <-session.kill: + // Pending requests not honoured in time, hard terminate + return - for { - select { - case r, ok := <-res: - if !ok { - return - } - sectionStart := r.sectionIndex * m.sectionSize - s := sectionStart - if begin > s { - s = begin - } - e := sectionStart + m.sectionSize - 1 - if end < e { - e = end - } - for i := s; i <= e; i++ { - b := r.vector[(i-sectionStart)/8] - bit := 7 - i%8 - if b != 0 { - if b&(1<<bit) != 0 { - select { - case <-m.stop: - return - case resultsCh <- i: - } - } - } else { - i += bit - } - } + case req := <-dist: + // New retrieval request arrived to be distributed to some fetcher process + queue := requests[req.bit] + index := sort.Search(len(queue), func(i int) bool { return queue[i] >= req.section }) + requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...) - case <-m.stop: - return + // If it's a new bit and we have waiting fetchers, allocate to them + if len(queue) == 0 { + assign(req.bit) } - } - }() - - return resultsCh -} - -// Stop stops the matching process -func (m *Matcher) Stop() { - close(m.stop) - m.distWg.Wait() -} -// distributeRequests receives requests from the fetchers and either queues them -// or immediately forwards them to one of the waiting NextRequest functions. -// Requests with a lower section idx are always prioritized. -func (m *Matcher) distributeRequests() { - m.distWg.Add(1) - stopDist := make(chan struct{}) - go func() { - <-m.stop - m.wg.Wait() - close(stopDist) - }() + case fetcher := <-retrievers: + // New retriever arrived, find the lowest section-ed bit to assign + bit, best := uint(0), uint64(math.MaxUint64) + for idx := range unallocs { + if requests[idx][0] < best { + bit, best = idx, requests[idx][0] + } + } + // Stop tracking this bit (and alloc notifications if no more work is available) + delete(unallocs, bit) + if len(unallocs) == 0 { + retrievers = nil + } + allocs++ + fetcher <- bit + + case fetcher := <-m.counters: + // New task count request arrives, return number of items + fetcher <- uint(len(requests[<-fetcher])) + + case fetcher := <-m.retrievals: + // New fetcher waiting for tasks to retrieve, assign + task := <-fetcher + if want := len(task.Sections); want >= len(requests[task.Bit]) { + task.Sections = requests[task.Bit] + delete(requests, task.Bit) + } else { + task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...) + requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...) + } + fetcher <- task - m.running = true + // If anything was left unallocated, try to assign to someone else + if len(requests[task.Bit]) > 0 { + assign(task.Bit) + } - go func() { - for { - select { - case r := <-m.distCh: - m.lock.Lock() - queue := m.reqs[r.bloomIndex] - i := 0 - for i < len(queue) && r.sectionIndex > queue[i] { - i++ + case result := <-m.deliveries: + // New retrieval task response from fetcher, split out missing sections and + // deliver complete ones + var ( + sections = make([]uint64, 0, len(result.Sections)) + bitsets = make([][]byte, 0, len(result.Bitsets)) + missing = make([]uint64, 0, len(result.Sections)) + ) + for i, bitset := range result.Bitsets { + if len(bitset) == 0 { + missing = append(missing, result.Sections[i]) + continue } - queue = append(queue, 0) - copy(queue[i+1:], queue[i:len(queue)-1]) - queue[i] = r.sectionIndex - m.reqs[r.bloomIndex] = queue - if len(queue) == 1 { - m.freeQueue(r.bloomIndex) + sections = append(sections, result.Sections[i]) + bitsets = append(bitsets, bitset) + } + m.schedulers[result.Bit].deliver(sections, bitsets) + allocs-- + + // Reschedule missing sections and allocate bit if newly available + if len(missing) > 0 { + queue := requests[result.Bit] + for _, section := range missing { + index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section }) + queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...) } - m.lock.Unlock() - case <-stopDist: - m.lock.Lock() - for _, ch := range m.allocQueue { - close(ch) + requests[result.Bit] = queue + + if len(queue) == len(missing) { + assign(result.Bit) } - m.allocQueue = nil - m.running = false - m.lock.Unlock() - m.distWg.Done() + } + // If we're in the process of shutting down, terminate + if allocs == 0 && shutdown == nil { return } } - }() + } } -// freeQueue marks a queue as free if there are no AllocSectionQueue functions -// waiting for allocation. If there is someone waiting, the queue is immediately -// allocated. -func (m *Matcher) freeQueue(bloomIndex uint) { - if len(m.allocQueue) > 0 { - m.allocQueue[0] <- bloomIndex - m.allocQueue = m.allocQueue[1:] - } else { - m.freeQueues[bloomIndex] = struct{}{} +// MatcherSession is returned by a started matcher to be used as a terminator +// for the actively running matching operation. +type MatcherSession struct { + matcher *Matcher + + quit chan struct{} // Quit channel to request pipeline termination + kill chan struct{} // Term channel to signal non-graceful forced shutdown + pend sync.WaitGroup +} + +// Close stops the matching process and waits for all subprocesses to terminate +// before returning. The timeout may be used for graceful shutdown, allowing the +// currently running retrievals to complete before this time. +func (s *MatcherSession) Close(timeout time.Duration) { + // Bail out if the matcher is not running + select { + case <-s.quit: + return + default: } + // Signal termination and wait for all goroutines to tear down + close(s.quit) + time.AfterFunc(timeout, func() { close(s.kill) }) + s.pend.Wait() } -// AllocSectionQueue allocates a queue of requested section indexes belonging to the same -// bloom bit index for a client process that can either immediately fetch the contents -// of the queue or wait a little while for more section indexes to be requested. -func (m *Matcher) AllocSectionQueue() (uint, bool) { - m.lock.Lock() - if !m.running { - m.lock.Unlock() +// AllocateRetrieval assigns a bloom bit index to a client process that can either +// immediately reuest and fetch the section contents assigned to this bit or wait +// a little while for more sections to be requested. +func (s *MatcherSession) AllocateRetrieval() (uint, bool) { + fetcher := make(chan uint) + + select { + case <-s.quit: return 0, false + case s.matcher.retrievers <- fetcher: + bit, ok := <-fetcher + return bit, ok } +} - var allocCh chan uint - if len(m.freeQueues) > 0 { - var ( - found bool - bestSection uint64 - bestIndex uint - ) - for bloomIndex, _ := range m.freeQueues { - if !found || m.reqs[bloomIndex][0] < bestSection { - found = true - bestIndex = bloomIndex - bestSection = m.reqs[bloomIndex][0] - } - } - delete(m.freeQueues, bestIndex) - m.lock.Unlock() - return bestIndex, true - } else { - allocCh = make(chan uint) - m.allocQueue = append(m.allocQueue, allocCh) +// PendingSections returns the number of pending section retrievals belonging to +// the given bloom bit index. +func (s *MatcherSession) PendingSections(bit uint) int { + fetcher := make(chan uint) + + select { + case <-s.quit: + return 0 + case s.matcher.counters <- fetcher: + fetcher <- bit + return int(<-fetcher) } - m.lock.Unlock() - - bloomIndex, ok := <-allocCh - return bloomIndex, ok } -// SectionCount returns the length of the section index queue belonging to the given bloom bit index -func (m *Matcher) SectionCount(bloomIndex uint) int { - m.lock.Lock() - defer m.lock.Unlock() - - return len(m.reqs[bloomIndex]) +// AllocateSections assigns all or part of an already allocated bit-task queue +// to the requesting process. +func (s *MatcherSession) AllocateSections(bit uint, count int) []uint64 { + fetcher := make(chan *Retrieval) + + select { + case <-s.quit: + return nil + case s.matcher.retrievals <- fetcher: + task := &Retrieval{ + Bit: bit, + Sections: make([]uint64, count), + } + fetcher <- task + return (<-fetcher).Sections + } } -// FetchSections fetches all or part of an already allocated queue and deallocates it -func (m *Matcher) FetchSections(bloomIndex uint, maxCount int) []uint64 { - m.lock.Lock() - defer m.lock.Unlock() - - queue := m.reqs[bloomIndex] - if maxCount < len(queue) { - // return only part of the existing queue, mark the rest as free - m.reqs[bloomIndex] = queue[maxCount:] - m.freeQueue(bloomIndex) - return queue[:maxCount] - } else { - // return the entire queue - delete(m.reqs, bloomIndex) - return queue +// DeliverSections delivers a batch of section bit-vectors for a specific bloom +// bit index to be injected into the processing pipeline. +func (s *MatcherSession) DeliverSections(bit uint, sections []uint64, bitsets [][]byte) { + select { + case <-s.kill: + return + case s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets}: } } -// Deliver delivers a bit vector to the appropriate fetcher. -// It is possible to deliver data even after Stop has been called. Once a vector has been -// requested, the matcher will keep waiting for delivery. -func (m *Matcher) Deliver(bloomIndex uint, sectionIdxList []uint64, data [][]byte) { - m.fetchers[bloomIndex].deliver(sectionIdxList, data) +// Multiplex polls the matcher session for rerieval tasks and multiplexes it into +// the reuested retrieval queue to be serviced together with other sessions. +// +// This method will block for the lifetime of the session. Even after termination +// of the session, any request in-flight need to be responded to! Empty responses +// are fine though in that case. +func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) { + for { + // Allocate a new bloom bit index to retrieve data for, stopping when done + bit, ok := s.AllocateRetrieval() + if !ok { + return + } + // Bit allocated, throttle a bit if we're below our batch limit + if s.PendingSections(bit) < batch { + select { + case <-s.quit: + // Session terminating, we can't meaningfully service, abort + s.AllocateSections(bit, 0) + s.DeliverSections(bit, []uint64{}, [][]byte{}) + return + + case <-time.After(wait): + // Throttling up, fetch whatever's available + } + } + // Allocate as much as we can handle and request servicing + sections := s.AllocateSections(bit, batch) + request := make(chan *Retrieval) + + select { + case <-s.quit: + // Session terminating, we can't meaningfully service, abort + s.DeliverSections(bit, sections, make([][]byte, len(sections))) + return + + case mux <- request: + // Retrieval accepted, something must arrive before we're aborting + request <- &Retrieval{Bit: bit, Sections: sections} + + result := <-request + s.DeliverSections(result.Bit, result.Sections, result.Bitsets) + } + } } diff --git a/core/bloombits/matcher_test.go b/core/bloombits/matcher_test.go index bef1491b8..fc49b43b8 100644 --- a/core/bloombits/matcher_test.go +++ b/core/bloombits/matcher_test.go @@ -13,6 +13,7 @@ // // You should have received a copy of the GNU Lesser General Public License // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + package bloombits import ( @@ -20,177 +21,219 @@ import ( "sync/atomic" "testing" "time" - - "github.com/ethereum/go-ethereum/core/types" ) const testSectionSize = 4096 -func matcherTestVector(b uint, s uint64) []byte { - r := make([]byte, testSectionSize/8) - for i, _ := range r { - var bb byte - for bit := 0; bit < 8; bit++ { - blockIdx := s*testSectionSize + uint64(i*8+bit) - bb += bb - if (blockIdx % uint64(b)) == 0 { - bb++ - } - } - r[i] = bb - } - return r +// Tests the matcher pipeline on a single continuous workflow without interrupts. +func TestMatcherContinuous(t *testing.T) { + testMatcherDiffBatches(t, [][]bloomIndexes{{{10, 20, 30}}}, 100000, false, 75) + testMatcherDiffBatches(t, [][]bloomIndexes{{{32, 3125, 100}}, {{40, 50, 10}}}, 100000, false, 81) + testMatcherDiffBatches(t, [][]bloomIndexes{{{4, 8, 11}, {7, 8, 17}}, {{9, 9, 12}, {15, 20, 13}}, {{18, 15, 15}, {12, 10, 4}}}, 10000, false, 36) } -func expMatch1(idxs types.BloomIndexList, i uint64) bool { - for _, ii := range idxs { - if (i % uint64(ii)) != 0 { - return false - } - } - return true +// Tests the matcher pipeline on a constantly interrupted and resumed work pattern +// with the aim of ensuring data items are requested only once. +func TestMatcherIntermittent(t *testing.T) { + testMatcherDiffBatches(t, [][]bloomIndexes{{{10, 20, 30}}}, 100000, true, 75) + testMatcherDiffBatches(t, [][]bloomIndexes{{{32, 3125, 100}}, {{40, 50, 10}}}, 100000, true, 81) + testMatcherDiffBatches(t, [][]bloomIndexes{{{4, 8, 11}, {7, 8, 17}}, {{9, 9, 12}, {15, 20, 13}}, {{18, 15, 15}, {12, 10, 4}}}, 10000, true, 36) } -func expMatch2(idxs []types.BloomIndexList, i uint64) bool { - for _, ii := range idxs { - if expMatch1(ii, i) { - return true - } +// Tests the matcher pipeline on random input to hopefully catch anomalies. +func TestMatcherRandom(t *testing.T) { + for i := 0; i < 10; i++ { + testMatcherBothModes(t, makeRandomIndexes([]int{1}, 50), 10000, 0) + testMatcherBothModes(t, makeRandomIndexes([]int{3}, 50), 10000, 0) + testMatcherBothModes(t, makeRandomIndexes([]int{2, 2, 2}, 20), 10000, 0) + testMatcherBothModes(t, makeRandomIndexes([]int{5, 5, 5}, 50), 10000, 0) + testMatcherBothModes(t, makeRandomIndexes([]int{4, 4, 4}, 20), 10000, 0) } - return false } -func expMatch3(idxs [][]types.BloomIndexList, i uint64) bool { - for _, ii := range idxs { - if !expMatch2(ii, i) { - return false +// makeRandomIndexes generates a random filter system, composed on multiple filter +// criteria, each having one bloom list component for the address and arbitrarilly +// many topic bloom list components. +func makeRandomIndexes(lengths []int, max int) [][]bloomIndexes { + res := make([][]bloomIndexes, len(lengths)) + for i, topics := range lengths { + res[i] = make([]bloomIndexes, topics) + for j := 0; j < topics; j++ { + for k := 0; k < len(res[i][j]); k++ { + res[i][j][k] = uint(rand.Intn(max-1) + 2) + } } } - return true + return res } -func testServeMatcher(m *Matcher, stop chan struct{}, cnt *uint32, maxRequestLen int) { - // serve matcher with test vectors - for i := 0; i < 10; i++ { - go func() { - for { - select { - case <-stop: - return - default: - } - b, ok := m.AllocSectionQueue() - if !ok { - return - } - if m.SectionCount(b) < maxRequestLen { - time.Sleep(time.Microsecond * 100) - } - s := m.FetchSections(b, maxRequestLen) - res := make([][]byte, len(s)) - for i, ss := range s { - res[i] = matcherTestVector(b, ss) - atomic.AddUint32(cnt, 1) - } - m.Deliver(b, s, res) - } - }() +// testMatcherDiffBatches runs the given matches test in single-delivery and also +// in batches delivery mode, verifying that all kinds of deliveries are handled +// correctly withn. +func testMatcherDiffBatches(t *testing.T, filter [][]bloomIndexes, blocks uint64, intermittent bool, retrievals uint32) { + singleton := testMatcher(t, filter, blocks, intermittent, retrievals, 1) + batched := testMatcher(t, filter, blocks, intermittent, retrievals, 16) + + if singleton != batched { + t.Errorf("filter = %v blocks = %v intermittent = %v: request count mismatch, %v in signleton vs. %v in batched mode", filter, blocks, intermittent, singleton, batched) } } -func testMatcher(t *testing.T, idxs [][]types.BloomIndexList, cnt uint64, stopOnMatches bool, expCount uint32) uint32 { - count1 := testMatcherWithReqCount(t, idxs, cnt, stopOnMatches, expCount, 1) - count16 := testMatcherWithReqCount(t, idxs, cnt, stopOnMatches, expCount, 16) - if count1 != count16 { - t.Errorf("Error matching idxs = %v count = %v stopOnMatches = %v: request count mismatch, %v with maxReqCount = 1 vs. %v with maxReqCount = 16", idxs, cnt, stopOnMatches, count1, count16) +// testMatcherBothModes runs the given matcher test in both continuous as well as +// in intermittent mode, verifying that the request counts match each other. +func testMatcherBothModes(t *testing.T, filter [][]bloomIndexes, blocks uint64, retrievals uint32) { + continuous := testMatcher(t, filter, blocks, false, retrievals, 16) + intermittent := testMatcher(t, filter, blocks, true, retrievals, 16) + + if continuous != intermittent { + t.Errorf("filter = %v blocks = %v: request count mismatch, %v in continuous vs. %v in intermittent mode", filter, blocks, continuous, intermittent) } - return count1 } -func testMatcherWithReqCount(t *testing.T, idxs [][]types.BloomIndexList, cnt uint64, stopOnMatches bool, expCount uint32, maxReqCount int) uint32 { - m := NewMatcher(testSectionSize, nil, nil) +// testMatcher is a generic tester to run the given matcher test and return the +// number of requests made for cross validation between different modes. +func testMatcher(t *testing.T, filter [][]bloomIndexes, blocks uint64, intermittent bool, retrievals uint32, maxReqCount int) uint32 { + // Create a new matcher an simulate our explicit random bitsets + matcher := NewMatcher(testSectionSize, nil, nil) - for _, idxss := range idxs { - for _, idxs := range idxss { - for _, idx := range idxs { - m.newFetcher(idx) + matcher.addresses = filter[0] + matcher.topics = filter[1:] + + for _, rule := range filter { + for _, topic := range rule { + for _, bit := range topic { + matcher.addScheduler(bit) } } } + // Track the number of retrieval requests made + var requested uint32 - m.addresses = idxs[0] - m.topics = idxs[1:] - var reqCount uint32 + // Start the matching session for the filter and the retriver goroutines + quit := make(chan struct{}) + matches := make(chan uint64, 16) - stop := make(chan struct{}) - chn := m.Start(0, cnt-1) - testServeMatcher(m, stop, &reqCount, maxReqCount) + session, err := matcher.Start(0, blocks-1, matches) + if err != nil { + t.Fatalf("failed to stat matcher session: %v", err) + } + startRetrievers(session, quit, &requested, maxReqCount) - for i := uint64(0); i < cnt; i++ { - if expMatch3(idxs, i) { - match, ok := <-chn + // Iterate over all the blocks and verify that the pipeline produces the correct matches + for i := uint64(0); i < blocks; i++ { + if expMatch3(filter, i) { + match, ok := <-matches if !ok { - t.Errorf("Error matching idxs = %v count = %v stopOnMatches = %v: expected #%v, results channel closed", idxs, cnt, stopOnMatches, i) + t.Errorf("filter = %v blocks = %v intermittent = %v: expected #%v, results channel closed", filter, blocks, intermittent, i) return 0 } if match != i { - t.Errorf("Error matching idxs = %v count = %v stopOnMatches = %v: expected #%v, got #%v", idxs, cnt, stopOnMatches, i, match) + t.Errorf("filter = %v blocks = %v intermittent = %v: expected #%v, got #%v", filter, blocks, intermittent, i, match) } - if stopOnMatches { - m.Stop() - close(stop) - stop = make(chan struct{}) - chn = m.Start(i+1, cnt-1) - testServeMatcher(m, stop, &reqCount, maxReqCount) + // If we're testing intermittent mode, abort and restart the pipeline + if intermittent { + session.Close(time.Second) + close(quit) + + quit = make(chan struct{}) + matches = make(chan uint64, 16) + + session, err = matcher.Start(i+1, blocks-1, matches) + if err != nil { + t.Fatalf("failed to stat matcher session: %v", err) + } + startRetrievers(session, quit, &requested, maxReqCount) } } } - match, ok := <-chn + // Ensure the result channel is torn down after the last block + match, ok := <-matches if ok { - t.Errorf("Error matching idxs = %v count = %v stopOnMatches = %v: expected closed channel, got #%v", idxs, cnt, stopOnMatches, match) + t.Errorf("filter = %v blocks = %v intermittent = %v: expected closed channel, got #%v", filter, blocks, intermittent, match) } - m.Stop() - close(stop) + // Clean up the session and ensure we match the expected retrieval count + session.Close(time.Second) + close(quit) - if expCount != 0 && expCount != reqCount { - t.Errorf("Error matching idxs = %v count = %v stopOnMatches = %v: request count mismatch, expected #%v, got #%v", idxs, cnt, stopOnMatches, expCount, reqCount) + if retrievals != 0 && requested != retrievals { + t.Errorf("filter = %v blocks = %v intermittent = %v: request count mismatch, have #%v, want #%v", filter, blocks, intermittent, requested, retrievals) } + return requested +} + +// startRetrievers starts a batch of goroutines listening for section requests +// and serving them. +func startRetrievers(session *MatcherSession, quit chan struct{}, retrievals *uint32, batch int) { + requests := make(chan chan *Retrieval) + + for i := 0; i < 10; i++ { + // Start a multiplexer to test multiple threaded execution + go session.Multiplex(batch, 100*time.Microsecond, requests) - return reqCount + // Start a services to match the above multiplexer + go func() { + for { + // Wait for a service request or a shutdown + select { + case <-quit: + return + + case request := <-requests: + task := <-request + + task.Bitsets = make([][]byte, len(task.Sections)) + for i, section := range task.Sections { + if rand.Int()%4 != 0 { // Handle occasional missing deliveries + task.Bitsets[i] = generateBitset(task.Bit, section) + atomic.AddUint32(retrievals, 1) + } + } + request <- task + } + } + }() + } } -func testRandomIdxs(l []int, max int) [][]types.BloomIndexList { - res := make([][]types.BloomIndexList, len(l)) - for i, ll := range l { - res[i] = make([]types.BloomIndexList, ll) - for j, _ := range res[i] { - for k, _ := range res[i][j] { - res[i][j][k] = uint(rand.Intn(max-1) + 2) +// generateBitset generates the rotated bitset for the given bloom bit and section +// numbers. +func generateBitset(bit uint, section uint64) []byte { + bitset := make([]byte, testSectionSize/8) + for i := 0; i < len(bitset); i++ { + for b := 0; b < 8; b++ { + blockIdx := section*testSectionSize + uint64(i*8+b) + bitset[i] += bitset[i] + if (blockIdx % uint64(bit)) == 0 { + bitset[i]++ } } } - return res + return bitset } -func TestMatcher(t *testing.T) { - testMatcher(t, [][]types.BloomIndexList{{{10, 20, 30}}}, 100000, false, 75) - testMatcher(t, [][]types.BloomIndexList{{{32, 3125, 100}}, {{40, 50, 10}}}, 100000, false, 81) - testMatcher(t, [][]types.BloomIndexList{{{4, 8, 11}, {7, 8, 17}}, {{9, 9, 12}, {15, 20, 13}}, {{18, 15, 15}, {12, 10, 4}}}, 10000, false, 36) +func expMatch1(filter bloomIndexes, i uint64) bool { + for _, ii := range filter { + if (i % uint64(ii)) != 0 { + return false + } + } + return true } -func TestMatcherStopOnMatches(t *testing.T) { - testMatcher(t, [][]types.BloomIndexList{{{10, 20, 30}}}, 100000, true, 75) - testMatcher(t, [][]types.BloomIndexList{{{4, 8, 11}, {7, 8, 17}}, {{9, 9, 12}, {15, 20, 13}}, {{18, 15, 15}, {12, 10, 4}}}, 10000, true, 36) +func expMatch2(filter []bloomIndexes, i uint64) bool { + for _, ii := range filter { + if expMatch1(ii, i) { + return true + } + } + return false } -func TestMatcherRandom(t *testing.T) { - for i := 0; i < 20; i++ { - testMatcher(t, testRandomIdxs([]int{1}, 50), 100000, false, 0) - testMatcher(t, testRandomIdxs([]int{3}, 50), 100000, false, 0) - testMatcher(t, testRandomIdxs([]int{2, 2, 2}, 20), 100000, false, 0) - testMatcher(t, testRandomIdxs([]int{5, 5, 5}, 50), 100000, false, 0) - idxs := testRandomIdxs([]int{2, 2, 2}, 20) - reqCount := testMatcher(t, idxs, 10000, false, 0) - testMatcher(t, idxs, 10000, true, reqCount) +func expMatch3(filter [][]bloomIndexes, i uint64) bool { + for _, ii := range filter { + if !expMatch2(ii, i) { + return false + } } + return true } diff --git a/core/bloombits/scheduler.go b/core/bloombits/scheduler.go new file mode 100644 index 000000000..6449c7465 --- /dev/null +++ b/core/bloombits/scheduler.go @@ -0,0 +1,181 @@ +// Copyright 2017 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package bloombits + +import ( + "sync" +) + +// request represents a bloom retrieval task to prioritize and pull from the local +// database or remotely from the network. +type request struct { + section uint64 // Section index to retrieve the a bit-vector from + bit uint // Bit index within the section to retrieve the vector of +} + +// response represents the state of a requested bit-vector through a scheduler. +type response struct { + cached []byte // Cached bits to dedup multiple requests + done chan struct{} // Channel to allow waiting for completion +} + +// scheduler handles the scheduling of bloom-filter retrieval operations for +// entire section-batches belonging to a single bloom bit. Beside scheduling the +// retrieval operations, this struct also deduplicates the requests and caches +// the results to minimize network/database overhead even in complex filtering +// scenarios. +type scheduler struct { + bit uint // Index of the bit in the bloom filter this scheduler is responsible for + responses map[uint64]*response // Currently pending retrieval requests or already cached responses + lock sync.Mutex // Lock protecting the responses from concurrent access +} + +// newScheduler creates a new bloom-filter retrieval scheduler for a specific +// bit index. +func newScheduler(idx uint) *scheduler { + return &scheduler{ + bit: idx, + responses: make(map[uint64]*response), + } +} + +// run creates a retrieval pipeline, receiving section indexes from sections and +// returning the results in the same order through the done channel. Concurrent +// runs of the same scheduler are allowed, leading to retrieval task deduplication. +func (s *scheduler) run(sections chan uint64, dist chan *request, done chan []byte, quit chan struct{}, wg *sync.WaitGroup) { + // Create a forwarder channel between requests and responses of the same size as + // the distribution channel (since that will block the pipeline anyway). + pend := make(chan uint64, cap(dist)) + + // Start the pipeline schedulers to forward between user -> distributor -> user + wg.Add(2) + go s.scheduleRequests(sections, dist, pend, quit, wg) + go s.scheduleDeliveries(pend, done, quit, wg) +} + +// reset cleans up any leftovers from previous runs. This is required before a +// restart to ensure the no previously requested but never delivered state will +// cause a lockup. +func (s *scheduler) reset() { + s.lock.Lock() + defer s.lock.Unlock() + + for section, res := range s.responses { + if res.cached == nil { + delete(s.responses, section) + } + } +} + +// scheduleRequests reads section retrieval requests from the input channel, +// deduplicates the stream and pushes unique retrieval tasks into the distribution +// channel for a database or network layer to honour. +func (s *scheduler) scheduleRequests(reqs chan uint64, dist chan *request, pend chan uint64, quit chan struct{}, wg *sync.WaitGroup) { + // Clean up the goroutine and pipeline when done + defer wg.Done() + defer close(pend) + + // Keep reading and scheduling section requests + for { + select { + case <-quit: + return + + case section, ok := <-reqs: + // New section retrieval requested + if !ok { + return + } + // Deduplicate retrieval requests + unique := false + + s.lock.Lock() + if s.responses[section] == nil { + s.responses[section] = &response{ + done: make(chan struct{}), + } + unique = true + } + s.lock.Unlock() + + // Schedule the section for retrieval and notify the deliverer to expect this section + if unique { + select { + case <-quit: + return + case dist <- &request{bit: s.bit, section: section}: + } + } + select { + case <-quit: + return + case pend <- section: + } + } + } +} + +// scheduleDeliveries reads section acceptance notifications and waits for them +// to be delivered, pushing them into the output data buffer. +func (s *scheduler) scheduleDeliveries(pend chan uint64, done chan []byte, quit chan struct{}, wg *sync.WaitGroup) { + // Clean up the goroutine and pipeline when done + defer wg.Done() + defer close(done) + + // Keep reading notifications and scheduling deliveries + for { + select { + case <-quit: + return + + case idx, ok := <-pend: + // New section retrieval pending + if !ok { + return + } + // Wait until the request is honoured + s.lock.Lock() + res := s.responses[idx] + s.lock.Unlock() + + select { + case <-quit: + return + case <-res.done: + } + // Deliver the result + select { + case <-quit: + return + case done <- res.cached: + } + } + } +} + +// deliver is called by the request distributor when a reply to a request arrives. +func (s *scheduler) deliver(sections []uint64, data [][]byte) { + s.lock.Lock() + defer s.lock.Unlock() + + for i, section := range sections { + if res := s.responses[section]; res != nil && res.cached == nil { // Avoid non-requests and double deliveries + res.cached = data[i] + close(res.done) + } + } +} diff --git a/core/bloombits/scheduler_test.go b/core/bloombits/scheduler_test.go new file mode 100644 index 000000000..8a159c237 --- /dev/null +++ b/core/bloombits/scheduler_test.go @@ -0,0 +1,105 @@ +// Copyright 2017 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package bloombits + +import ( + "bytes" + "math/big" + "math/rand" + "sync" + "sync/atomic" + "testing" + "time" +) + +// Tests that the scheduler can deduplicate and forward retrieval requests to +// underlying fetchers and serve responses back, irrelevant of the concurrency +// of the requesting clients or serving data fetchers. +func TestSchedulerSingleClientSingleFetcher(t *testing.T) { testScheduler(t, 1, 1, 5000) } +func TestSchedulerSingleClientMultiFetcher(t *testing.T) { testScheduler(t, 1, 10, 5000) } +func TestSchedulerMultiClientSingleFetcher(t *testing.T) { testScheduler(t, 10, 1, 5000) } +func TestSchedulerMultiClientMultiFetcher(t *testing.T) { testScheduler(t, 10, 10, 5000) } + +func testScheduler(t *testing.T, clients int, fetchers int, requests int) { + f := newScheduler(0) + + // Create a batch of handler goroutines that respond to bloom bit requests and + // deliver them to the scheduler. + var fetchPend sync.WaitGroup + fetchPend.Add(fetchers) + defer fetchPend.Wait() + + fetch := make(chan *request, 16) + defer close(fetch) + + var delivered uint32 + for i := 0; i < fetchers; i++ { + go func() { + defer fetchPend.Done() + + for req := range fetch { + time.Sleep(time.Duration(rand.Intn(int(100 * time.Microsecond)))) + atomic.AddUint32(&delivered, 1) + + f.deliver([]uint64{ + req.section + uint64(requests), // Non-requested data (ensure it doesn't go out of bounds) + req.section, // Requested data + req.section, // Duplicated data (ensure it doesn't double close anything) + }, [][]byte{ + []byte{}, + new(big.Int).SetUint64(req.section).Bytes(), + new(big.Int).SetUint64(req.section).Bytes(), + }) + } + }() + } + // Start a batch of goroutines to concurrently run scheduling tasks + quit := make(chan struct{}) + + var pend sync.WaitGroup + pend.Add(clients) + + for i := 0; i < clients; i++ { + go func() { + defer pend.Done() + + in := make(chan uint64, 16) + out := make(chan []byte, 16) + + f.run(in, fetch, out, quit, &pend) + + go func() { + for j := 0; j < requests; j++ { + in <- uint64(j) + } + close(in) + }() + + for j := 0; j < requests; j++ { + bits := <-out + if want := new(big.Int).SetUint64(uint64(j)).Bytes(); !bytes.Equal(bits, want) { + t.Errorf("vector %d: delivered content mismatch: have %x, want %x", j, bits, want) + } + } + }() + } + pend.Wait() + + if have := atomic.LoadUint32(&delivered); int(have) != requests { + t.Errorf("request count mismatch: have %v, want %v", have, requests) + } +} diff --git a/core/bloombits/utils.go b/core/bloombits/utils.go deleted file mode 100644 index d0755cb65..000000000 --- a/core/bloombits/utils.go +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2017 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. -package bloombits - -import ( - "github.com/ethereum/go-ethereum/core/types" -) - -const BloomLength = 2048 - -// BloomBitsCreator takes SectionSize number of header bloom filters and calculates the bloomBits vectors of the section -type BloomBitsCreator struct { - blooms [BloomLength][]byte - sectionSize, bitIndex uint64 -} - -func NewBloomBitsCreator(sectionSize uint64) *BloomBitsCreator { - b := &BloomBitsCreator{sectionSize: sectionSize} - for i, _ := range b.blooms { - b.blooms[i] = make([]byte, sectionSize/8) - } - return b -} - -// AddHeaderBloom takes a single bloom filter and sets the corresponding bit column in memory accordingly -func (b *BloomBitsCreator) AddHeaderBloom(bloom types.Bloom) { - if b.bitIndex >= b.sectionSize { - panic("too many header blooms added") - } - - byteIdx := b.bitIndex / 8 - bitMask := byte(1) << byte(7-b.bitIndex%8) - for bloomBitIdx, _ := range b.blooms { - bloomByteIdx := BloomLength/8 - 1 - bloomBitIdx/8 - bloomBitMask := byte(1) << byte(bloomBitIdx%8) - if (bloom[bloomByteIdx] & bloomBitMask) != 0 { - b.blooms[bloomBitIdx][byteIdx] |= bitMask - } - } - b.bitIndex++ -} - -// GetBitVector returns the bit vector belonging to the given bit index after header blooms have been added -func (b *BloomBitsCreator) GetBitVector(idx uint) []byte { - if b.bitIndex != b.sectionSize { - panic("not enough header blooms added") - } - - return b.blooms[idx][:] -} |