From ab27bee25a845be90bd60e774ff68d2ea1501772 Mon Sep 17 00:00:00 2001 From: Péter Szilágyi Date: Mon, 5 Oct 2015 19:37:56 +0300 Subject: core, eth, trie: direct state trie synchronization --- core/state/sync.go | 98 ++++++++++++++++++++ core/state/sync_test.go | 238 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 336 insertions(+) create mode 100644 core/state/sync.go create mode 100644 core/state/sync_test.go (limited to 'core/state') diff --git a/core/state/sync.go b/core/state/sync.go new file mode 100644 index 000000000..e9bebe8ee --- /dev/null +++ b/core/state/sync.go @@ -0,0 +1,98 @@ +// Copyright 2015 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package state + +import ( + "bytes" + "math/big" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto/sha3" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" +) + +type StateSync struct { + db ethdb.Database + sync *trie.TrieSync + codeReqs map[common.Hash]struct{} // requested but not yet written to database + codeReqList []common.Hash // requested since last Missing +} + +var sha3_nil = common.BytesToHash(sha3.NewKeccak256().Sum(nil)) + +func NewStateSync(root common.Hash, db ethdb.Database) *StateSync { + ss := &StateSync{ + db: db, + codeReqs: make(map[common.Hash]struct{}), + } + ss.codeReqs[sha3_nil] = struct{}{} // never request the nil hash + ss.sync = trie.NewTrieSync(root, db, ss.leafFound) + return ss +} + +func (self *StateSync) leafFound(leaf []byte, parent common.Hash) error { + var obj struct { + Nonce uint64 + Balance *big.Int + Root common.Hash + CodeHash []byte + } + if err := rlp.Decode(bytes.NewReader(leaf), &obj); err != nil { + return err + } + self.sync.AddSubTrie(obj.Root, 64, parent, nil) + + codehash := common.BytesToHash(obj.CodeHash) + if _, ok := self.codeReqs[codehash]; !ok { + code, _ := self.db.Get(obj.CodeHash) + if code == nil { + self.codeReqs[codehash] = struct{}{} + self.codeReqList = append(self.codeReqList, codehash) + } + } + return nil +} + +func (self *StateSync) Missing(max int) []common.Hash { + cr := len(self.codeReqList) + gh := 0 + if max != 0 { + if cr > max { + cr = max + } + gh = max - cr + } + list := append(self.sync.Missing(gh), self.codeReqList[:cr]...) + self.codeReqList = self.codeReqList[cr:] + return list +} + +func (self *StateSync) Process(list []trie.SyncResult) error { + for i := 0; i < len(list); i++ { + if _, ok := self.codeReqs[list[i].Hash]; ok { // code data, not a node + self.db.Put(list[i].Hash[:], list[i].Data) + delete(self.codeReqs, list[i].Hash) + list[i] = list[len(list)-1] + list = list[:len(list)-1] + i-- + } + } + _, err := self.sync.Process(list) + return err +} diff --git a/core/state/sync_test.go b/core/state/sync_test.go new file mode 100644 index 000000000..f6afe8bd8 --- /dev/null +++ b/core/state/sync_test.go @@ -0,0 +1,238 @@ +// Copyright 2015 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package state + +import ( + "bytes" + "math/big" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/trie" +) + +// testAccount is the data associated with an account used by the state tests. +type testAccount struct { + address common.Address + balance *big.Int + nonce uint64 + code []byte +} + +// makeTestState create a sample test state to test node-wise reconstruction. +func makeTestState() (ethdb.Database, common.Hash, []*testAccount) { + // Create an empty state + db, _ := ethdb.NewMemDatabase() + state := New(common.Hash{}, db) + + // Fill it with some arbitrary data + accounts := []*testAccount{} + for i := byte(0); i < 255; i++ { + obj := state.GetOrNewStateObject(common.BytesToAddress([]byte{i})) + acc := &testAccount{address: common.BytesToAddress([]byte{i})} + + obj.AddBalance(big.NewInt(int64(11 * i))) + acc.balance = big.NewInt(int64(11 * i)) + + obj.SetNonce(uint64(42 * i)) + acc.nonce = uint64(42 * i) + + if i%3 == 0 { + obj.SetCode([]byte{i, i, i, i, i}) + acc.code = []byte{i, i, i, i, i} + } + state.UpdateStateObject(obj) + accounts = append(accounts, acc) + } + root, _ := state.Commit() + + // Return the generated state + return db, root, accounts +} + +// checkStateAccounts cross references a reconstructed state with an expected +// account array. +func checkStateAccounts(t *testing.T, db ethdb.Database, root common.Hash, accounts []*testAccount) { + state := New(root, db) + for i, acc := range accounts { + + if balance := state.GetBalance(acc.address); balance.Cmp(acc.balance) != 0 { + t.Errorf("account %d: balance mismatch: have %v, want %v", i, balance, acc.balance) + } + if nonce := state.GetNonce(acc.address); nonce != acc.nonce { + t.Errorf("account %d: nonce mismatch: have %v, want %v", i, nonce, acc.nonce) + } + if code := state.GetCode(acc.address); bytes.Compare(code, acc.code) != 0 { + t.Errorf("account %d: code mismatch: have %x, want %x", i, code, acc.code) + } + } +} + +// Tests that an empty state is not scheduled for syncing. +func TestEmptyStateSync(t *testing.T) { + empty := common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421") + db, _ := ethdb.NewMemDatabase() + if req := NewStateSync(empty, db).Missing(1); len(req) != 0 { + t.Errorf("content requested for empty state: %v", req) + } +} + +// Tests that given a root hash, a state can sync iteratively on a single thread, +// requesting retrieval tasks and returning all of them in one go. +func TestIterativeStateSyncIndividual(t *testing.T) { testIterativeStateSync(t, 1) } +func TestIterativeStateSyncBatched(t *testing.T) { testIterativeStateSync(t, 100) } + +func testIterativeStateSync(t *testing.T, batch int) { + // Create a random state to copy + srcDb, srcRoot, srcAccounts := makeTestState() + + // Create a destination state and sync with the scheduler + dstDb, _ := ethdb.NewMemDatabase() + sched := NewStateSync(srcRoot, dstDb) + + queue := append([]common.Hash{}, sched.Missing(batch)...) + for len(queue) > 0 { + results := make([]trie.SyncResult, len(queue)) + for i, hash := range queue { + data, err := srcDb.Get(hash.Bytes()) + if err != nil { + t.Fatalf("failed to retrieve node data for %x: %v", hash, err) + } + results[i] = trie.SyncResult{hash, data} + } + if err := sched.Process(results); err != nil { + t.Fatalf("failed to process results: %v", err) + } + queue = append(queue[:0], sched.Missing(batch)...) + } + // Cross check that the two states are in sync + checkStateAccounts(t, dstDb, srcRoot, srcAccounts) +} + +// Tests that the trie scheduler can correctly reconstruct the state even if only +// partial results are returned, and the others sent only later. +func TestIterativeDelayedStateSync(t *testing.T) { + // Create a random state to copy + srcDb, srcRoot, srcAccounts := makeTestState() + + // Create a destination state and sync with the scheduler + dstDb, _ := ethdb.NewMemDatabase() + sched := NewStateSync(srcRoot, dstDb) + + queue := append([]common.Hash{}, sched.Missing(0)...) + for len(queue) > 0 { + // Sync only half of the scheduled nodes + results := make([]trie.SyncResult, len(queue)/2+1) + for i, hash := range queue[:len(results)] { + data, err := srcDb.Get(hash.Bytes()) + if err != nil { + t.Fatalf("failed to retrieve node data for %x: %v", hash, err) + } + results[i] = trie.SyncResult{hash, data} + } + if err := sched.Process(results); err != nil { + t.Fatalf("failed to process results: %v", err) + } + queue = append(queue[len(results):], sched.Missing(0)...) + } + // Cross check that the two states are in sync + checkStateAccounts(t, dstDb, srcRoot, srcAccounts) +} + +// Tests that given a root hash, a trie can sync iteratively on a single thread, +// requesting retrieval tasks and returning all of them in one go, however in a +// random order. +func TestIterativeRandomStateSyncIndividual(t *testing.T) { testIterativeRandomStateSync(t, 1) } +func TestIterativeRandomStateSyncBatched(t *testing.T) { testIterativeRandomStateSync(t, 100) } + +func testIterativeRandomStateSync(t *testing.T, batch int) { + // Create a random state to copy + srcDb, srcRoot, srcAccounts := makeTestState() + + // Create a destination state and sync with the scheduler + dstDb, _ := ethdb.NewMemDatabase() + sched := NewStateSync(srcRoot, dstDb) + + queue := make(map[common.Hash]struct{}) + for _, hash := range sched.Missing(batch) { + queue[hash] = struct{}{} + } + for len(queue) > 0 { + // Fetch all the queued nodes in a random order + results := make([]trie.SyncResult, 0, len(queue)) + for hash, _ := range queue { + data, err := srcDb.Get(hash.Bytes()) + if err != nil { + t.Fatalf("failed to retrieve node data for %x: %v", hash, err) + } + results = append(results, trie.SyncResult{hash, data}) + } + // Feed the retrieved results back and queue new tasks + if err := sched.Process(results); err != nil { + t.Fatalf("failed to process results: %v", err) + } + queue = make(map[common.Hash]struct{}) + for _, hash := range sched.Missing(batch) { + queue[hash] = struct{}{} + } + } + // Cross check that the two states are in sync + checkStateAccounts(t, dstDb, srcRoot, srcAccounts) +} + +// Tests that the trie scheduler can correctly reconstruct the state even if only +// partial results are returned (Even those randomly), others sent only later. +func TestIterativeRandomDelayedStateSync(t *testing.T) { + // Create a random state to copy + srcDb, srcRoot, srcAccounts := makeTestState() + + // Create a destination state and sync with the scheduler + dstDb, _ := ethdb.NewMemDatabase() + sched := NewStateSync(srcRoot, dstDb) + + queue := make(map[common.Hash]struct{}) + for _, hash := range sched.Missing(0) { + queue[hash] = struct{}{} + } + for len(queue) > 0 { + // Sync only half of the scheduled nodes, even those in random order + results := make([]trie.SyncResult, 0, len(queue)/2+1) + for hash, _ := range queue { + delete(queue, hash) + + data, err := srcDb.Get(hash.Bytes()) + if err != nil { + t.Fatalf("failed to retrieve node data for %x: %v", hash, err) + } + results = append(results, trie.SyncResult{hash, data}) + + if len(results) >= cap(results) { + break + } + } + // Feed the retrieved results back and queue new tasks + if err := sched.Process(results); err != nil { + t.Fatalf("failed to process results: %v", err) + } + for _, hash := range sched.Missing(0) { + queue[hash] = struct{}{} + } + } + // Cross check that the two states are in sync + checkStateAccounts(t, dstDb, srcRoot, srcAccounts) +} -- cgit From b97e34a8e4d06b315cc495819ba6612f89dec54f Mon Sep 17 00:00:00 2001 From: Péter Szilágyi Date: Wed, 7 Oct 2015 12:14:30 +0300 Subject: eth/downloader: concurrent receipt and state processing --- core/state/sync.go | 93 ++++++++++++++++++------------------------------- core/state/sync_test.go | 16 ++++----- 2 files changed, 41 insertions(+), 68 deletions(-) (limited to 'core/state') diff --git a/core/state/sync.go b/core/state/sync.go index e9bebe8ee..5a388886c 100644 --- a/core/state/sync.go +++ b/core/state/sync.go @@ -21,78 +21,51 @@ import ( "math/big" "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/crypto/sha3" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" ) -type StateSync struct { - db ethdb.Database - sync *trie.TrieSync - codeReqs map[common.Hash]struct{} // requested but not yet written to database - codeReqList []common.Hash // requested since last Missing -} +// StateSync is the main state synchronisation scheduler, which provides yet the +// unknown state hashes to retrieve, accepts node data associated with said hashes +// and reconstructs the state database step by step until all is done. +type StateSync trie.TrieSync -var sha3_nil = common.BytesToHash(sha3.NewKeccak256().Sum(nil)) +// NewStateSync create a new state trie download scheduler. +func NewStateSync(root common.Hash, database ethdb.Database) *StateSync { + // Pre-declare the result syncer t + var syncer *trie.TrieSync -func NewStateSync(root common.Hash, db ethdb.Database) *StateSync { - ss := &StateSync{ - db: db, - codeReqs: make(map[common.Hash]struct{}), - } - ss.codeReqs[sha3_nil] = struct{}{} // never request the nil hash - ss.sync = trie.NewTrieSync(root, db, ss.leafFound) - return ss -} + callback := func(leaf []byte, parent common.Hash) error { + var obj struct { + Nonce uint64 + Balance *big.Int + Root common.Hash + CodeHash []byte + } + if err := rlp.Decode(bytes.NewReader(leaf), &obj); err != nil { + return err + } + syncer.AddSubTrie(obj.Root, 64, parent, nil) + syncer.AddRawEntry(common.BytesToHash(obj.CodeHash), 64, parent) -func (self *StateSync) leafFound(leaf []byte, parent common.Hash) error { - var obj struct { - Nonce uint64 - Balance *big.Int - Root common.Hash - CodeHash []byte - } - if err := rlp.Decode(bytes.NewReader(leaf), &obj); err != nil { - return err + return nil } - self.sync.AddSubTrie(obj.Root, 64, parent, nil) + syncer = trie.NewTrieSync(root, database, callback) + return (*StateSync)(syncer) +} - codehash := common.BytesToHash(obj.CodeHash) - if _, ok := self.codeReqs[codehash]; !ok { - code, _ := self.db.Get(obj.CodeHash) - if code == nil { - self.codeReqs[codehash] = struct{}{} - self.codeReqList = append(self.codeReqList, codehash) - } - } - return nil +// Missing retrieves the known missing nodes from the state trie for retrieval. +func (s *StateSync) Missing(max int) []common.Hash { + return (*trie.TrieSync)(s).Missing(max) } -func (self *StateSync) Missing(max int) []common.Hash { - cr := len(self.codeReqList) - gh := 0 - if max != 0 { - if cr > max { - cr = max - } - gh = max - cr - } - list := append(self.sync.Missing(gh), self.codeReqList[:cr]...) - self.codeReqList = self.codeReqList[cr:] - return list +// Process injects a batch of retrieved trie nodes data. +func (s *StateSync) Process(list []trie.SyncResult) (int, error) { + return (*trie.TrieSync)(s).Process(list) } -func (self *StateSync) Process(list []trie.SyncResult) error { - for i := 0; i < len(list); i++ { - if _, ok := self.codeReqs[list[i].Hash]; ok { // code data, not a node - self.db.Put(list[i].Hash[:], list[i].Data) - delete(self.codeReqs, list[i].Hash) - list[i] = list[len(list)-1] - list = list[:len(list)-1] - i-- - } - } - _, err := self.sync.Process(list) - return err +// Pending returns the number of state entries currently pending for download. +func (s *StateSync) Pending() int { + return (*trie.TrieSync)(s).Pending() } diff --git a/core/state/sync_test.go b/core/state/sync_test.go index f6afe8bd8..f0376d484 100644 --- a/core/state/sync_test.go +++ b/core/state/sync_test.go @@ -115,8 +115,8 @@ func testIterativeStateSync(t *testing.T, batch int) { } results[i] = trie.SyncResult{hash, data} } - if err := sched.Process(results); err != nil { - t.Fatalf("failed to process results: %v", err) + if index, err := sched.Process(results); err != nil { + t.Fatalf("failed to process result #%d: %v", index, err) } queue = append(queue[:0], sched.Missing(batch)...) } @@ -145,8 +145,8 @@ func TestIterativeDelayedStateSync(t *testing.T) { } results[i] = trie.SyncResult{hash, data} } - if err := sched.Process(results); err != nil { - t.Fatalf("failed to process results: %v", err) + if index, err := sched.Process(results); err != nil { + t.Fatalf("failed to process result #%d: %v", index, err) } queue = append(queue[len(results):], sched.Missing(0)...) } @@ -183,8 +183,8 @@ func testIterativeRandomStateSync(t *testing.T, batch int) { results = append(results, trie.SyncResult{hash, data}) } // Feed the retrieved results back and queue new tasks - if err := sched.Process(results); err != nil { - t.Fatalf("failed to process results: %v", err) + if index, err := sched.Process(results); err != nil { + t.Fatalf("failed to process result #%d: %v", index, err) } queue = make(map[common.Hash]struct{}) for _, hash := range sched.Missing(batch) { @@ -226,8 +226,8 @@ func TestIterativeRandomDelayedStateSync(t *testing.T) { } } // Feed the retrieved results back and queue new tasks - if err := sched.Process(results); err != nil { - t.Fatalf("failed to process results: %v", err) + if index, err := sched.Process(results); err != nil { + t.Fatalf("failed to process result #%d: %v", index, err) } for _, hash := range sched.Missing(0) { queue[hash] = struct{}{} -- cgit From 5b0ee8ec304663898073b7a4c659e1def23716df Mon Sep 17 00:00:00 2001 From: Péter Szilágyi Date: Tue, 13 Oct 2015 12:04:25 +0300 Subject: core, eth, trie: fix data races and merge/review issues --- core/state/sync.go | 3 +-- core/state/sync_test.go | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'core/state') diff --git a/core/state/sync.go b/core/state/sync.go index 5a388886c..ef2b4b84c 100644 --- a/core/state/sync.go +++ b/core/state/sync.go @@ -26,14 +26,13 @@ import ( "github.com/ethereum/go-ethereum/trie" ) -// StateSync is the main state synchronisation scheduler, which provides yet the +// StateSync is the main state synchronisation scheduler, which provides yet the // unknown state hashes to retrieve, accepts node data associated with said hashes // and reconstructs the state database step by step until all is done. type StateSync trie.TrieSync // NewStateSync create a new state trie download scheduler. func NewStateSync(root common.Hash, database ethdb.Database) *StateSync { - // Pre-declare the result syncer t var syncer *trie.TrieSync callback := func(leaf []byte, parent common.Hash) error { diff --git a/core/state/sync_test.go b/core/state/sync_test.go index f0376d484..0dab372ba 100644 --- a/core/state/sync_test.go +++ b/core/state/sync_test.go @@ -38,7 +38,7 @@ type testAccount struct { func makeTestState() (ethdb.Database, common.Hash, []*testAccount) { // Create an empty state db, _ := ethdb.NewMemDatabase() - state := New(common.Hash{}, db) + state, _ := New(common.Hash{}, db) // Fill it with some arbitrary data accounts := []*testAccount{} @@ -68,7 +68,7 @@ func makeTestState() (ethdb.Database, common.Hash, []*testAccount) { // checkStateAccounts cross references a reconstructed state with an expected // account array. func checkStateAccounts(t *testing.T, db ethdb.Database, root common.Hash, accounts []*testAccount) { - state := New(root, db) + state, _ := New(root, db) for i, acc := range accounts { if balance := state.GetBalance(acc.address); balance.Cmp(acc.balance) != 0 { -- cgit