diff options
Diffstat (limited to 'trie/hasher.go')
-rw-r--r-- | trie/hasher.go | 156 |
1 files changed, 69 insertions, 87 deletions
diff --git a/trie/hasher.go b/trie/hasher.go index 5186d7669..2fc44787a 100644 --- a/trie/hasher.go +++ b/trie/hasher.go @@ -26,51 +26,34 @@ import ( "github.com/ethereum/go-ethereum/rlp" ) -// calculator is a utility used by the hasher to calculate the hash value of the tree node. -type calculator struct { - sha hash.Hash - buffer *bytes.Buffer -} - -// calculatorPool is a set of temporary calculators that may be individually saved and retrieved. -var calculatorPool = sync.Pool{ - New: func() interface{} { - return &calculator{buffer: new(bytes.Buffer), sha: sha3.NewKeccak256()} - }, -} - -// hasher hasher is used to calculate the hash value of the whole tree. type hasher struct { + tmp *bytes.Buffer + sha hash.Hash cachegen uint16 cachelimit uint16 - threaded bool - mu sync.Mutex + onleaf LeafCallback } -func newHasher(cachegen, cachelimit uint16) *hasher { - h := &hasher{ - cachegen: cachegen, - cachelimit: cachelimit, - } - return h +// hashers live in a global db. +var hasherPool = sync.Pool{ + New: func() interface{} { + return &hasher{tmp: new(bytes.Buffer), sha: sha3.NewKeccak256()} + }, } -// newCalculator retrieves a cleaned calculator from calculator pool. -func (h *hasher) newCalculator() *calculator { - calculator := calculatorPool.Get().(*calculator) - calculator.buffer.Reset() - calculator.sha.Reset() - return calculator +func newHasher(cachegen, cachelimit uint16, onleaf LeafCallback) *hasher { + h := hasherPool.Get().(*hasher) + h.cachegen, h.cachelimit, h.onleaf = cachegen, cachelimit, onleaf + return h } -// returnCalculator returns a no longer used calculator to the pool. -func (h *hasher) returnCalculator(calculator *calculator) { - calculatorPool.Put(calculator) +func returnHasherToPool(h *hasher) { + hasherPool.Put(h) } // hash collapses a node down into a hash node, also returning a copy of the // original node initialized with the computed hash to replace the original one. -func (h *hasher) hash(n node, db DatabaseWriter, force bool) (node, node, error) { +func (h *hasher) hash(n node, db *Database, force bool) (node, node, error) { // If we're not storing the node, just hashing, use available cached data if hash, dirty := n.cache(); hash != nil { if db == nil { @@ -117,7 +100,7 @@ func (h *hasher) hash(n node, db DatabaseWriter, force bool) (node, node, error) // hashChildren replaces the children of a node with their hashes if the encoded // size of the child is larger than a hash, returning the collapsed node as well // as a replacement for the original node with the child hashes cached in. -func (h *hasher) hashChildren(original node, db DatabaseWriter) (node, node, error) { +func (h *hasher) hashChildren(original node, db *Database) (node, node, error) { var err error switch n := original.(type) { @@ -142,49 +125,16 @@ func (h *hasher) hashChildren(original node, db DatabaseWriter) (node, node, err // Hash the full node's children, caching the newly hashed subtrees collapsed, cached := n.copy(), n.copy() - // hashChild is a helper to hash a single child, which is called either on the - // same thread as the caller or in a goroutine for the toplevel branching. - hashChild := func(index int, wg *sync.WaitGroup) { - if wg != nil { - defer wg.Done() - } - // Ensure that nil children are encoded as empty strings. - if collapsed.Children[index] == nil { - collapsed.Children[index] = valueNode(nil) - return - } - // Hash all other children properly - var herr error - collapsed.Children[index], cached.Children[index], herr = h.hash(n.Children[index], db, false) - if herr != nil { - h.mu.Lock() // rarely if ever locked, no congenstion - err = herr - h.mu.Unlock() + for i := 0; i < 16; i++ { + if n.Children[i] != nil { + collapsed.Children[i], cached.Children[i], err = h.hash(n.Children[i], db, false) + if err != nil { + return original, original, err + } + } else { + collapsed.Children[i] = valueNode(nil) // Ensure that nil children are encoded as empty strings. } } - // If we're not running in threaded mode yet, span a goroutine for each child - if !h.threaded { - // Disable further threading - h.threaded = true - - // Hash all the children concurrently - var wg sync.WaitGroup - for i := 0; i < 16; i++ { - wg.Add(1) - go hashChild(i, &wg) - } - wg.Wait() - - // Reenable threading for subsequent hash calls - h.threaded = false - } else { - for i := 0; i < 16; i++ { - hashChild(i, nil) - } - } - if err != nil { - return original, original, err - } cached.Children[16] = n.Children[16] if collapsed.Children[16] == nil { collapsed.Children[16] = valueNode(nil) @@ -197,34 +147,66 @@ func (h *hasher) hashChildren(original node, db DatabaseWriter) (node, node, err } } -func (h *hasher) store(n node, db DatabaseWriter, force bool) (node, error) { +// store hashes the node n and if we have a storage layer specified, it writes +// the key/value pair to it and tracks any node->child references as well as any +// node->external trie references. +func (h *hasher) store(n node, db *Database, force bool) (node, error) { // Don't store hashes or empty nodes. if _, isHash := n.(hashNode); n == nil || isHash { return n, nil } - calculator := h.newCalculator() - defer h.returnCalculator(calculator) - // Generate the RLP encoding of the node - if err := rlp.Encode(calculator.buffer, n); err != nil { + h.tmp.Reset() + if err := rlp.Encode(h.tmp, n); err != nil { panic("encode error: " + err.Error()) } - if calculator.buffer.Len() < 32 && !force { + if h.tmp.Len() < 32 && !force { return n, nil // Nodes smaller than 32 bytes are stored inside their parent } // Larger nodes are replaced by their hash and stored in the database. hash, _ := n.cache() if hash == nil { - calculator.sha.Write(calculator.buffer.Bytes()) - hash = hashNode(calculator.sha.Sum(nil)) + h.sha.Reset() + h.sha.Write(h.tmp.Bytes()) + hash = hashNode(h.sha.Sum(nil)) } if db != nil { - // db might be a leveldb batch, which is not safe for concurrent writes - h.mu.Lock() - err := db.Put(hash, calculator.buffer.Bytes()) - h.mu.Unlock() + // We are pooling the trie nodes into an intermediate memory cache + db.lock.Lock() - return hash, err + hash := common.BytesToHash(hash) + db.insert(hash, h.tmp.Bytes()) + + // Track all direct parent->child node references + switch n := n.(type) { + case *shortNode: + if child, ok := n.Val.(hashNode); ok { + db.reference(common.BytesToHash(child), hash) + } + case *fullNode: + for i := 0; i < 16; i++ { + if child, ok := n.Children[i].(hashNode); ok { + db.reference(common.BytesToHash(child), hash) + } + } + } + db.lock.Unlock() + + // Track external references from account->storage trie + if h.onleaf != nil { + switch n := n.(type) { + case *shortNode: + if child, ok := n.Val.(valueNode); ok { + h.onleaf(child, hash) + } + case *fullNode: + for i := 0; i < 16; i++ { + if child, ok := n.Children[i].(valueNode); ok { + h.onleaf(child, hash) + } + } + } + } } return hash, nil } |