diff options
author | obscuren <geffobscura@gmail.com> | 2015-05-03 22:09:10 +0800 |
---|---|---|
committer | obscuren <geffobscura@gmail.com> | 2015-05-03 22:27:03 +0800 |
commit | 1470b22e9051f48fbbeb136cc4c0be0877e9f9a7 (patch) | |
tree | 8b88f38edd9cf61073bee7253a2a8033042b25ae | |
parent | ba2236fa513e06603d3fa2a6d721be3879d7f50e (diff) | |
download | go-tangerine-1470b22e9051f48fbbeb136cc4c0be0877e9f9a7.tar.gz go-tangerine-1470b22e9051f48fbbeb136cc4c0be0877e9f9a7.tar.zst go-tangerine-1470b22e9051f48fbbeb136cc4c0be0877e9f9a7.zip |
downloader: hash downloading recovery
If a peer fails to respond (disconnect, etc) during hash downloading
switch to a different peer which has it's current_hash in the queue's
peer set.
-rw-r--r-- | eth/downloader/downloader.go | 73 | ||||
-rw-r--r-- | eth/downloader/downloader_test.go | 27 |
2 files changed, 71 insertions, 29 deletions
diff --git a/eth/downloader/downloader.go b/eth/downloader/downloader.go index a484ce0a7..15f4cb0a3 100644 --- a/eth/downloader/downloader.go +++ b/eth/downloader/downloader.go @@ -53,6 +53,11 @@ type syncPack struct { ignoreInitial bool } +type hashPack struct { + peerId string + hashes []common.Hash +} + type Downloader struct { mu sync.RWMutex queue *queue @@ -69,7 +74,7 @@ type Downloader struct { // Channels newPeerCh chan *peer - hashCh chan []common.Hash + hashCh chan hashPack blockCh chan blockPack } @@ -80,7 +85,7 @@ func New(hasBlock hashCheckFn, getBlock getBlockFn) *Downloader { hasBlock: hasBlock, getBlock: getBlock, newPeerCh: make(chan *peer, 1), - hashCh: make(chan []common.Hash, 1), + hashCh: make(chan hashPack, 1), blockCh: make(chan blockPack, 1), } @@ -235,15 +240,15 @@ func (d *Downloader) getFromPeer(p *peer, hash common.Hash, ignoreInitial bool) } // XXX Make synchronous -func (d *Downloader) startFetchingHashes(p *peer, hash common.Hash, ignoreInitial bool) error { +func (d *Downloader) startFetchingHashes(p *peer, h common.Hash, ignoreInitial bool) error { atomic.StoreInt32(&d.fetchingHashes, 1) defer atomic.StoreInt32(&d.fetchingHashes, 0) - if d.queue.has(hash) { + if d.queue.has(h) { return errAlreadyInPool } - glog.V(logger.Debug).Infof("Downloading hashes (%x) from %s", hash.Bytes()[:4], p.id) + glog.V(logger.Debug).Infof("Downloading hashes (%x) from %s", h[:4], p.id) start := time.Now() @@ -251,22 +256,34 @@ func (d *Downloader) startFetchingHashes(p *peer, hash common.Hash, ignoreInitia // In such circumstances we don't need to download the block so don't add it to the queue. if !ignoreInitial { // Add the hash to the queue first - d.queue.hashPool.Add(hash) + d.queue.hashPool.Add(h) } // Get the first batch of hashes - p.getHashes(hash) + p.getHashes(h) - failureResponseTimer := time.NewTimer(hashTtl) + var ( + failureResponseTimer = time.NewTimer(hashTtl) + attemptedPeers = make(map[string]bool) // attempted peers will help with retries + activePeer = p // active peer will help determine the current active peer + hash common.Hash // common and last hash + ) + attemptedPeers[p.id] = true out: for { select { - case hashes := <-d.hashCh: + case hashPack := <-d.hashCh: + // make sure the active peer is giving us the hashes + if hashPack.peerId != activePeer.id { + glog.V(logger.Debug).Infof("Received hashes from incorrect peer(%s)\n", hashPack.peerId) + break + } + failureResponseTimer.Reset(hashTtl) var ( - done bool // determines whether we're done fetching hashes (i.e. common hash found) - hash common.Hash // current and common hash + hashes = hashPack.hashes + done bool // determines whether we're done fetching hashes (i.e. common hash found) ) hashSet := set.New() for _, hash = range hashes { @@ -283,13 +300,13 @@ out: // Add hashes to the chunk set if len(hashes) == 0 { // Make sure the peer actually gave you something valid - glog.V(logger.Debug).Infof("Peer (%s) responded with empty hash set\n", p.id) + glog.V(logger.Debug).Infof("Peer (%s) responded with empty hash set\n", activePeer.id) d.queue.reset() return errEmptyHashSet } else if !done { // Check if we're done fetching // Get the next set of hashes - p.getHashes(hashes[len(hashes)-1]) + activePeer.getHashes(hash) } else { // we're done // The offset of the queue is determined by the highest known block var offset int @@ -303,12 +320,30 @@ out: } case <-failureResponseTimer.C: glog.V(logger.Debug).Infof("Peer (%s) didn't respond in time for hash request\n", p.id) - // TODO instead of reseting the queue select a new peer from which we can start downloading hashes. - // 1. check for peer's best hash to be included in the current hash set; - // 2. resume from last point (hashes[len(hashes)-1]) using the newly selected peer. - d.queue.reset() - return errTimeout + var p *peer // p will be set if a peer can be found + // Attempt to find a new peer by checking inclusion of peers best hash in our + // already fetched hash list. This can't guarantee 100% correctness but does + // a fair job. This is always either correct or false incorrect. + for id, peer := range d.peers { + if d.queue.hashPool.Has(peer.recentHash) && !attemptedPeers[id] { + p = peer + break + } + } + + // if all peers have been tried, abort the process entirely or if the hash is + // the zero hash. + if p == nil || (hash == common.Hash{}) { + d.queue.reset() + return errTimeout + } + + // set p to the active peer. this will invalidate any hashes that may be returned + // by our previous (delayed) peer. + activePeer = p + p.getHashes(hash) + glog.V(logger.Debug).Infof("Hash fetching switched to new peer(%s)\n", p.id) } } glog.V(logger.Detail).Infof("Downloaded hashes (%d) in %v\n", d.queue.hashPool.Size(), time.Since(start)) @@ -454,7 +489,7 @@ func (d *Downloader) AddHashes(id string, hashes []common.Hash) error { glog.Infof("adding %d (T=%d) hashes [ %x / %x ] from: %s\n", len(hashes), d.queue.hashPool.Size(), from[:4], to[:4], id) } - d.hashCh <- hashes + d.hashCh <- hashPack{id, hashes} return nil } diff --git a/eth/downloader/downloader_test.go b/eth/downloader/downloader_test.go index fe68ea914..872ea02eb 100644 --- a/eth/downloader/downloader_test.go +++ b/eth/downloader/downloader_test.go @@ -42,12 +42,13 @@ func createBlocksFromHashes(hashes []common.Hash) map[common.Hash]*types.Block { } type downloadTester struct { - downloader *Downloader - hashes []common.Hash - blocks map[common.Hash]*types.Block - t *testing.T - pcount int - done chan bool + downloader *Downloader + hashes []common.Hash + blocks map[common.Hash]*types.Block + t *testing.T + pcount int + done chan bool + activePeerId string } func newTester(t *testing.T, hashes []common.Hash, blocks map[common.Hash]*types.Block) *downloadTester { @@ -58,6 +59,11 @@ func newTester(t *testing.T, hashes []common.Hash, blocks map[common.Hash]*types return tester } +func (dl *downloadTester) sync(peerId string, hash common.Hash) error { + dl.activePeerId = peerId + return dl.downloader.Synchronise(peerId, hash) +} + func (dl *downloadTester) hasBlock(hash common.Hash) bool { if knownHash == hash { return true @@ -70,7 +76,7 @@ func (dl *downloadTester) getBlock(hash common.Hash) *types.Block { } func (dl *downloadTester) getHashes(hash common.Hash) error { - dl.downloader.hashCh <- dl.hashes + dl.downloader.AddHashes(dl.activePeerId, dl.hashes) return nil } @@ -115,8 +121,9 @@ func TestDownload(t *testing.T) { tester.newPeer("peer2", big.NewInt(0), common.Hash{}) tester.badBlocksPeer("peer3", big.NewInt(0), common.Hash{}) tester.badBlocksPeer("peer4", big.NewInt(0), common.Hash{}) + tester.activePeerId = "peer1" - err := tester.downloader.Synchronise("peer1", hashes[0]) + err := tester.sync("peer1", hashes[0]) if err != nil { t.Error("download error", err) } @@ -139,7 +146,7 @@ func TestMissing(t *testing.T) { hashes = append(extraHashes, hashes[:len(hashes)-1]...) tester.newPeer("peer2", big.NewInt(0), common.Hash{}) - err := tester.downloader.Synchronise("peer1", hashes[0]) + err := tester.sync("peer1", hashes[0]) if err != nil { t.Error("download error", err) } @@ -164,7 +171,7 @@ func TestTaking(t *testing.T) { tester.badBlocksPeer("peer3", big.NewInt(0), common.Hash{}) tester.badBlocksPeer("peer4", big.NewInt(0), common.Hash{}) - err := tester.downloader.Synchronise("peer1", hashes[0]) + err := tester.sync("peer1", hashes[0]) if err != nil { t.Error("download error", err) } |