From b35aa21f9f8561c09cfe4d3b0548eda6cff8604e Mon Sep 17 00:00:00 2001 From: Nick Johnson Date: Thu, 13 Apr 2017 10:14:19 +0100 Subject: trie: implement unionIterator (#14312) --- trie/iterator.go | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 126 insertions(+), 12 deletions(-) (limited to 'trie/iterator.go') diff --git a/trie/iterator.go b/trie/iterator.go index ddc674d2b..42149a7d3 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -18,7 +18,7 @@ package trie import ( "bytes" - + "container/heap" "github.com/ethereum/go-ethereum/common" ) @@ -268,6 +268,26 @@ outer: return nil } +func compareNodes(a, b NodeIterator) int { + cmp := bytes.Compare(a.Path(), b.Path()) + if cmp != 0 { + return cmp + } + + if a.Leaf() && !b.Leaf() { + return -1 + } else if b.Leaf() && !a.Leaf() { + return 1 + } + + cmp = bytes.Compare(a.Hash().Bytes(), b.Hash().Bytes()) + if cmp != 0 { + return cmp + } + + return bytes.Compare(a.LeafBlob(), b.LeafBlob()) +} + type differenceIterator struct { a, b NodeIterator // Nodes returned are those in b - a. eof bool // Indicates a has run out of elements @@ -321,8 +341,7 @@ func (it *differenceIterator) Next(bool) bool { } for { - apath, bpath := it.a.Path(), it.b.Path() - switch bytes.Compare(apath, bpath) { + switch compareNodes(it.a, it.b) { case -1: // b jumped past a; advance a if !it.a.Next(true) { @@ -334,15 +353,6 @@ func (it *differenceIterator) Next(bool) bool { // b is before a return true case 0: - if it.a.Hash() != it.b.Hash() || it.a.Leaf() != it.b.Leaf() { - // Keys are identical, but hashes or leaf status differs - return true - } - if it.a.Leaf() && it.b.Leaf() && !bytes.Equal(it.a.LeafBlob(), it.b.LeafBlob()) { - // Both are leaf nodes, but with different values - return true - } - // a and b are identical; skip this whole subtree if the nodes have hashes hasHash := it.a.Hash() == common.Hash{} if !it.b.Next(hasHash) { @@ -364,3 +374,107 @@ func (it *differenceIterator) Error() error { } return it.b.Error() } + +type nodeIteratorHeap []NodeIterator + +func (h nodeIteratorHeap) Len() int { return len(h) } +func (h nodeIteratorHeap) Less(i, j int) bool { return compareNodes(h[i], h[j]) < 0 } +func (h nodeIteratorHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] } +func (h *nodeIteratorHeap) Push(x interface{}) { *h = append(*h, x.(NodeIterator)) } +func (h *nodeIteratorHeap) Pop() interface{} { + n := len(*h) + x := (*h)[n-1] + *h = (*h)[0 : n-1] + return x +} + +type unionIterator struct { + items *nodeIteratorHeap // Nodes returned are the union of the ones in these iterators + count int // Number of nodes scanned across all tries + err error // The error, if one has been encountered +} + +// NewUnionIterator constructs a NodeIterator that iterates over elements in the union +// of the provided NodeIterators. Returns the iterator, and a pointer to an integer +// recording the number of nodes visited. +func NewUnionIterator(iters []NodeIterator) (NodeIterator, *int) { + h := make(nodeIteratorHeap, len(iters)) + copy(h, iters) + heap.Init(&h) + + ui := &unionIterator{ + items: &h, + } + return ui, &ui.count +} + +func (it *unionIterator) Hash() common.Hash { + return (*it.items)[0].Hash() +} + +func (it *unionIterator) Parent() common.Hash { + return (*it.items)[0].Parent() +} + +func (it *unionIterator) Leaf() bool { + return (*it.items)[0].Leaf() +} + +func (it *unionIterator) LeafBlob() []byte { + return (*it.items)[0].LeafBlob() +} + +func (it *unionIterator) Path() []byte { + return (*it.items)[0].Path() +} + +// Next returns the next node in the union of tries being iterated over. +// +// It does this by maintaining a heap of iterators, sorted by the iteration +// order of their next elements, with one entry for each source trie. Each +// time Next() is called, it takes the least element from the heap to return, +// advancing any other iterators that also point to that same element. These +// iterators are called with descend=false, since we know that any nodes under +// these nodes will also be duplicates, found in the currently selected iterator. +// Whenever an iterator is advanced, it is pushed back into the heap if it still +// has elements remaining. +// +// In the case that descend=false - eg, we're asked to ignore all subnodes of the +// current node - we also advance any iterators in the heap that have the current +// path as a prefix. +func (it *unionIterator) Next(descend bool) bool { + if len(*it.items) == 0 { + return false + } + + // Get the next key from the union + least := heap.Pop(it.items).(NodeIterator) + + // Skip over other nodes as long as they're identical, or, if we're not descending, as + // long as they have the same prefix as the current node. + for len(*it.items) > 0 && ((!descend && bytes.HasPrefix((*it.items)[0].Path(), least.Path())) || compareNodes(least, (*it.items)[0]) == 0) { + skipped := heap.Pop(it.items).(NodeIterator) + // Skip the whole subtree if the nodes have hashes; otherwise just skip this node + if skipped.Next(skipped.Hash() == common.Hash{}) { + it.count += 1 + // If there are more elements, push the iterator back on the heap + heap.Push(it.items, skipped) + } + } + + if least.Next(descend) { + it.count += 1 + heap.Push(it.items, least) + } + + return len(*it.items) > 0 +} + +func (it *unionIterator) Error() error { + for i := 0; i < len(*it.items); i++ { + if err := (*it.items)[i].Error(); err != nil { + return err + } + } + return nil +} -- cgit