Merge pull request #1953 from karalabe/switch-to-fast-peers

eth/downloader: fetch data proportionally to peer capacity
author: Jeffrey Wilcke <jeffrey@ethereum.org> 2015-11-20 01:48:53 +0800
committer: Jeffrey Wilcke <jeffrey@ethereum.org> 2015-11-20 01:48:53 +0800
commit: f16fab91c8c2b0d4e33e6ee5792f05522f2b17cb (patch)
tree: 7e9a13377f52658d398f4f3dc11883f515bdbb3d /eth/downloader/peer.go
parent: 4c2933ad825aa11ce118abddfe6eeafc0422b2b6 (diff)
parent: b6f5523bdcded47c4f92b4cb5e6e23287bd6b60d (diff)
download: go-tangerine-f16fab91c8c2b0d4e33e6ee5792f05522f2b17cb.tar.gz
go-tangerine-f16fab91c8c2b0d4e33e6ee5792f05522f2b17cb.tar.zst
go-tangerine-f16fab91c8c2b0d4e33e6ee5792f05522f2b17cb.zip
1 files changed, 125 insertions, 106 deletions
diff --git a/eth/downloader/peer.go b/eth/downloader/peer.go
index 9ba6dabbd..80f08b68f 100644
--- a/eth/downloader/peer.go
+++ b/eth/downloader/peer.go
@@ -30,8 +30,10 @@ import (
 	"github.com/ethereum/go-ethereum/common"
 )
 
-// Maximum number of entries allowed on the list or lacking items.
-const maxLackingHashes = 4096
+const (
+	maxLackingHashes = 4096 // Maximum number of entries allowed on the list or lacking items
+	throughputImpact = 0.1  // The impact a single measurement has on a peer's final throughput value.
+)
 
 // Hash and block fetchers belonging to eth/61 and below
 type relativeHashFetcherFn func(common.Hash) error
@@ -59,18 +61,16 @@ type peer struct {
 	blockIdle   int32 // Current block activity state of the peer (idle = 0, active = 1)
 	receiptIdle int32 // Current receipt activity state of the peer (idle = 0, active = 1)
 	stateIdle   int32 // Current node data activity state of the peer (idle = 0, active = 1)
-	rep         int32 // Simple peer reputation
 
-	blockCapacity   int32 // Number of blocks (bodies) allowed to fetch per request
-	receiptCapacity int32 // Number of receipts allowed to fetch per request
-	stateCapacity   int32 // Number of node data pieces allowed to fetch per request
+	blockThroughput   float64 // Number of blocks (bodies) measured to be retrievable per second
+	receiptThroughput float64 // Number of receipts measured to be retrievable per second
+	stateThroughput   float64 // Number of node data pieces measured to be retrievable per second
 
 	blockStarted   time.Time // Time instance when the last block (body)fetch was started
 	receiptStarted time.Time // Time instance when the last receipt fetch was started
 	stateStarted   time.Time // Time instance when the last node data fetch was started
 
-	lacking     map[common.Hash]struct{} // Set of hashes not to request (didn't have previously)
-	lackingLock sync.RWMutex             // Lock protecting the lacking hashes list
+	lacking map[common.Hash]struct{} // Set of hashes not to request (didn't have previously)
 
 	getRelHashes relativeHashFetcherFn // [eth/61] Method to retrieve a batch of hashes from an origin hash
 	getAbsHashes absoluteHashFetcherFn // [eth/61] Method to retrieve a batch of hashes from an absolute position
@@ -84,6 +84,7 @@ type peer struct {
 	getNodeData stateFetcherFn   // [eth/63] Method to retrieve a batch of state trie data
 
 	version int // Eth protocol version number to switch strategies
+	lock    sync.RWMutex
 }
 
 // newPeer create a new downloader peer, with specific hash and block retrieval
@@ -93,12 +94,9 @@ func newPeer(id string, version int, head common.Hash,
 	getRelHeaders relativeHeaderFetcherFn, getAbsHeaders absoluteHeaderFetcherFn, getBlockBodies blockBodyFetcherFn,
 	getReceipts receiptFetcherFn, getNodeData stateFetcherFn) *peer {
 	return &peer{
-		id:              id,
-		head:            head,
-		blockCapacity:   1,
-		receiptCapacity: 1,
-		stateCapacity:   1,
-		lacking:         make(map[common.Hash]struct{}),
+		id:      id,
+		head:    head,
+		lacking: make(map[common.Hash]struct{}),
 
 		getRelHashes: getRelHashes,
 		getAbsHashes: getAbsHashes,
@@ -117,15 +115,18 @@ func newPeer(id string, version int, head common.Hash,
 
 // Reset clears the internal state of a peer entity.
 func (p *peer) Reset() {
+	p.lock.Lock()
+	defer p.lock.Unlock()
+
 	atomic.StoreInt32(&p.blockIdle, 0)
 	atomic.StoreInt32(&p.receiptIdle, 0)
-	atomic.StoreInt32(&p.blockCapacity, 1)
-	atomic.StoreInt32(&p.receiptCapacity, 1)
-	atomic.StoreInt32(&p.stateCapacity, 1)
+	atomic.StoreInt32(&p.stateIdle, 0)
+
+	p.blockThroughput = 0
+	p.receiptThroughput = 0
+	p.stateThroughput = 0
 
-	p.lackingLock.Lock()
 	p.lacking = make(map[common.Hash]struct{})
-	p.lackingLock.Unlock()
 }
 
 // Fetch61 sends a block retrieval request to the remote peer.
@@ -216,107 +217,86 @@ func (p *peer) FetchNodeData(request *fetchRequest) error {
 	return nil
 }
 
-// SetBlocksIdle sets the peer to idle, allowing it to execute new retrieval requests.
-// Its block retrieval allowance will also be updated either up- or downwards,
-// depending on whether the previous fetch completed in time.
-func (p *peer) SetBlocksIdle() {
-	p.setIdle(p.blockStarted, blockSoftTTL, blockHardTTL, MaxBlockFetch, &p.blockCapacity, &p.blockIdle)
+// SetBlocksIdle sets the peer to idle, allowing it to execute new block retrieval
+// requests. Its estimated block retrieval throughput is updated with that measured
+// just now.
+func (p *peer) SetBlocksIdle(delivered int) {
+	p.setIdle(p.blockStarted, delivered, &p.blockThroughput, &p.blockIdle)
 }
 
-// SetBodiesIdle sets the peer to idle, allowing it to execute new retrieval requests.
-// Its block body retrieval allowance will also be updated either up- or downwards,
-// depending on whether the previous fetch completed in time.
-func (p *peer) SetBodiesIdle() {
-	p.setIdle(p.blockStarted, bodySoftTTL, bodyHardTTL, MaxBodyFetch, &p.blockCapacity, &p.blockIdle)
+// SetBodiesIdle sets the peer to idle, allowing it to execute block body retrieval
+// requests. Its estimated body retrieval throughput is updated with that measured
+// just now.
+func (p *peer) SetBodiesIdle(delivered int) {
+	p.setIdle(p.blockStarted, delivered, &p.blockThroughput, &p.blockIdle)
 }
 
-// SetReceiptsIdle sets the peer to idle, allowing it to execute new retrieval requests.
-// Its receipt retrieval allowance will also be updated either up- or downwards,
-// depending on whether the previous fetch completed in time.
-func (p *peer) SetReceiptsIdle() {
-	p.setIdle(p.receiptStarted, receiptSoftTTL, receiptHardTTL, MaxReceiptFetch, &p.receiptCapacity, &p.receiptIdle)
+// SetReceiptsIdle sets the peer to idle, allowing it to execute new receipt
+// retrieval requests. Its estimated receipt retrieval throughput is updated
+// with that measured just now.
+func (p *peer) SetReceiptsIdle(delivered int) {
+	p.setIdle(p.receiptStarted, delivered, &p.receiptThroughput, &p.receiptIdle)
 }
 
-// SetNodeDataIdle sets the peer to idle, allowing it to execute new retrieval
-// requests. Its node data retrieval allowance will also be updated either up- or
-// downwards, depending on whether the previous fetch completed in time.
-func (p *peer) SetNodeDataIdle() {
-	p.setIdle(p.stateStarted, stateSoftTTL, stateSoftTTL, MaxStateFetch, &p.stateCapacity, &p.stateIdle)
+// SetNodeDataIdle sets the peer to idle, allowing it to execute new state trie
+// data retrieval requests. Its estimated state retrieval throughput is updated
+// with that measured just now.
+func (p *peer) SetNodeDataIdle(delivered int) {
+	p.setIdle(p.stateStarted, delivered, &p.stateThroughput, &p.stateIdle)
 }
 
 // setIdle sets the peer to idle, allowing it to execute new retrieval requests.
-// Its data retrieval allowance will also be updated either up- or downwards,
-// depending on whether the previous fetch completed in time.
-func (p *peer) setIdle(started time.Time, softTTL, hardTTL time.Duration, maxFetch int, capacity, idle *int32) {
-	// Update the peer's download allowance based on previous performance
-	scale := 2.0
-	if time.Since(started) > softTTL {
-		scale = 0.5
-		if time.Since(started) > hardTTL {
-			scale = 1 / float64(maxFetch) // reduces capacity to 1
-		}
-	}
-	for {
-		// Calculate the new download bandwidth allowance
-		prev := atomic.LoadInt32(capacity)
-		next := int32(math.Max(1, math.Min(float64(maxFetch), float64(prev)*scale)))
-
-		// Try to update the old value
-		if atomic.CompareAndSwapInt32(capacity, prev, next) {
-			// If we're having problems at 1 capacity, try to find better peers
-			if next == 1 {
-				p.Demote()
-			}
-			break
-		}
+// Its estimated retrieval throughput is updated with that measured just now.
+func (p *peer) setIdle(started time.Time, delivered int, throughput *float64, idle *int32) {
+	// Irrelevant of the scaling, make sure the peer ends up idle
+	defer atomic.StoreInt32(idle, 0)
+
+	p.lock.RLock()
+	defer p.lock.RUnlock()
+
+	// If nothing was delivered (hard timeout / unavailable data), reduce throughput to minimum
+	if delivered == 0 {
+		*throughput = 0
+		return
 	}
-	// Set the peer to idle to allow further fetch requests
-	atomic.StoreInt32(idle, 0)
+	// Otherwise update the throughput with a new measurement
+	measured := float64(delivered) / (float64(time.Since(started)+1) / float64(time.Second)) // +1 (ns) to ensure non-zero divisor
+	*throughput = (1-throughputImpact)*(*throughput) + throughputImpact*measured
 }
 
 // BlockCapacity retrieves the peers block download allowance based on its
-// previously discovered bandwidth capacity.
+// previously discovered throughput.
 func (p *peer) BlockCapacity() int {
-	return int(atomic.LoadInt32(&p.blockCapacity))
-}
+	p.lock.RLock()
+	defer p.lock.RUnlock()
 
-// ReceiptCapacity retrieves the peers block download allowance based on its
-// previously discovered bandwidth capacity.
-func (p *peer) ReceiptCapacity() int {
-	return int(atomic.LoadInt32(&p.receiptCapacity))
+	return int(math.Max(1, math.Min(p.blockThroughput*float64(blockTargetRTT)/float64(time.Second), float64(MaxBlockFetch))))
 }
 
-// NodeDataCapacity retrieves the peers block download allowance based on its
-// previously discovered bandwidth capacity.
-func (p *peer) NodeDataCapacity() int {
-	return int(atomic.LoadInt32(&p.stateCapacity))
-}
+// ReceiptCapacity retrieves the peers receipt download allowance based on its
+// previously discovered throughput.
+func (p *peer) ReceiptCapacity() int {
+	p.lock.RLock()
+	defer p.lock.RUnlock()
 
-// Promote increases the peer's reputation.
-func (p *peer) Promote() {
-	atomic.AddInt32(&p.rep, 1)
+	return int(math.Max(1, math.Min(p.receiptThroughput*float64(receiptTargetRTT)/float64(time.Second), float64(MaxReceiptFetch))))
 }
 
-// Demote decreases the peer's reputation or leaves it at 0.
-func (p *peer) Demote() {
-	for {
-		// Calculate the new reputation value
-		prev := atomic.LoadInt32(&p.rep)
-		next := prev / 2
+// NodeDataCapacity retrieves the peers state download allowance based on its
+// previously discovered throughput.
+func (p *peer) NodeDataCapacity() int {
+	p.lock.RLock()
+	defer p.lock.RUnlock()
 
-		// Try to update the old value
-		if atomic.CompareAndSwapInt32(&p.rep, prev, next) {
-			return
-		}
-	}
+	return int(math.Max(1, math.Min(p.stateThroughput*float64(stateTargetRTT)/float64(time.Second), float64(MaxStateFetch))))
 }
 
 // MarkLacking appends a new entity to the set of items (blocks, receipts, states)
 // that a peer is known not to have (i.e. have been requested before). If the
 // set reaches its maximum allowed capacity, items are randomly dropped off.
 func (p *peer) MarkLacking(hash common.Hash) {
-	p.lackingLock.Lock()
-	defer p.lackingLock.Unlock()
+	p.lock.Lock()
+	defer p.lock.Unlock()
 
 	for len(p.lacking) >= maxLackingHashes {
 		for drop, _ := range p.lacking {
@@ -330,8 +310,8 @@ func (p *peer) MarkLacking(hash common.Hash) {
 // Lacks retrieves whether the hash of a blockchain item is on the peers lacking
 // list (i.e. whether we know that the peer does not have it).
 func (p *peer) Lacks(hash common.Hash) bool {
-	p.lackingLock.RLock()
-	defer p.lackingLock.RUnlock()
+	p.lock.RLock()
+	defer p.lock.RUnlock()
 
 	_, ok := p.lacking[hash]
 	return ok
@@ -339,13 +319,13 @@ func (p *peer) Lacks(hash common.Hash) bool {
 
 // String implements fmt.Stringer.
 func (p *peer) String() string {
-	p.lackingLock.RLock()
-	defer p.lackingLock.RUnlock()
+	p.lock.RLock()
+	defer p.lock.RUnlock()
 
 	return fmt.Sprintf("Peer %s [%s]", p.id,
-		fmt.Sprintf("reputation %3d, ", atomic.LoadInt32(&p.rep))+
-			fmt.Sprintf("block cap %3d, ", atomic.LoadInt32(&p.blockCapacity))+
-			fmt.Sprintf("receipt cap %3d, ", atomic.LoadInt32(&p.receiptCapacity))+
+		fmt.Sprintf("blocks %3.2f/s, ", p.blockThroughput)+
+			fmt.Sprintf("receipts %3.2f/s, ", p.receiptThroughput)+
+			fmt.Sprintf("states %3.2f/s, ", p.stateThroughput)+
 			fmt.Sprintf("lacking %4d", len(p.lacking)),
 	)
 }
@@ -377,6 +357,10 @@ func (ps *peerSet) Reset() {
 
 // Register injects a new peer into the working set, or returns an error if the
 // peer is already known.
+//
+// The method also sets the starting throughput values of the new peer to the
+// average of all existing peers, to give it a realistic change of being used
+// for data retrievals.
 func (ps *peerSet) Register(p *peer) error {
 	ps.lock.Lock()
 	defer ps.lock.Unlock()
@@ -384,6 +368,20 @@ func (ps *peerSet) Register(p *peer) error {
 	if _, ok := ps.peers[p.id]; ok {
 		return errAlreadyRegistered
 	}
+	if len(ps.peers) > 0 {
+		p.blockThroughput, p.receiptThroughput, p.stateThroughput = 0, 0, 0
+
+		for _, peer := range ps.peers {
+			peer.lock.RLock()
+			p.blockThroughput += peer.blockThroughput
+			p.receiptThroughput += peer.receiptThroughput
+			p.stateThroughput += peer.stateThroughput
+			peer.lock.RUnlock()
+		}
+		p.blockThroughput /= float64(len(ps.peers))
+		p.receiptThroughput /= float64(len(ps.peers))
+		p.stateThroughput /= float64(len(ps.peers))
+	}
 	ps.peers[p.id] = p
 	return nil
 }
@@ -435,7 +433,12 @@ func (ps *peerSet) BlockIdlePeers() ([]*peer, int) {
 	idle := func(p *peer) bool {
 		return atomic.LoadInt32(&p.blockIdle) == 0
 	}
-	return ps.idlePeers(61, 61, idle)
+	throughput := func(p *peer) float64 {
+		p.lock.RLock()
+		defer p.lock.RUnlock()
+		return p.blockThroughput
+	}
+	return ps.idlePeers(61, 61, idle, throughput)
 }
 
 // BodyIdlePeers retrieves a flat list of all the currently body-idle peers within
@@ -444,7 +447,12 @@ func (ps *peerSet) BodyIdlePeers() ([]*peer, int) {
 	idle := func(p *peer) bool {
 		return atomic.LoadInt32(&p.blockIdle) == 0
 	}
-	return ps.idlePeers(62, 64, idle)
+	throughput := func(p *peer) float64 {
+		p.lock.RLock()
+		defer p.lock.RUnlock()
+		return p.blockThroughput
+	}
+	return ps.idlePeers(62, 64, idle, throughput)
 }
 
 // ReceiptIdlePeers retrieves a flat list of all the currently receipt-idle peers
@@ -453,7 +461,12 @@ func (ps *peerSet) ReceiptIdlePeers() ([]*peer, int) {
 	idle := func(p *peer) bool {
 		return atomic.LoadInt32(&p.receiptIdle) == 0
 	}
-	return ps.idlePeers(63, 64, idle)
+	throughput := func(p *peer) float64 {
+		p.lock.RLock()
+		defer p.lock.RUnlock()
+		return p.receiptThroughput
+	}
+	return ps.idlePeers(63, 64, idle, throughput)
 }
 
 // NodeDataIdlePeers retrieves a flat list of all the currently node-data-idle
@@ -462,12 +475,18 @@ func (ps *peerSet) NodeDataIdlePeers() ([]*peer, int) {
 	idle := func(p *peer) bool {
 		return atomic.LoadInt32(&p.stateIdle) == 0
 	}
-	return ps.idlePeers(63, 64, idle)
+	throughput := func(p *peer) float64 {
+		p.lock.RLock()
+		defer p.lock.RUnlock()
+		return p.stateThroughput
+	}
+	return ps.idlePeers(63, 64, idle, throughput)
 }
 
 // idlePeers retrieves a flat list of all currently idle peers satisfying the
 // protocol version constraints, using the provided function to check idleness.
-func (ps *peerSet) idlePeers(minProtocol, maxProtocol int, idleCheck func(*peer) bool) ([]*peer, int) {
+// The resulting set of peers are sorted by their measure throughput.
+func (ps *peerSet) idlePeers(minProtocol, maxProtocol int, idleCheck func(*peer) bool, throughput func(*peer) float64) ([]*peer, int) {
 	ps.lock.RLock()
 	defer ps.lock.RUnlock()
 
@@ -482,7 +501,7 @@ func (ps *peerSet) idlePeers(minProtocol, maxProtocol int, idleCheck func(*peer)
 	}
 	for i := 0; i < len(idle); i++ {
 		for j := i + 1; j < len(idle); j++ {
-			if atomic.LoadInt32(&idle[i].rep) < atomic.LoadInt32(&idle[j].rep) {
+			if throughput(idle[i]) < throughput(idle[j]) {
 				idle[i], idle[j] = idle[j], idle[i]
 			}
 		}
author	Jeffrey Wilcke <jeffrey@ethereum.org>	2015-11-20 01:48:53 +0800
committer	Jeffrey Wilcke <jeffrey@ethereum.org>	2015-11-20 01:48:53 +0800
commit	f16fab91c8c2b0d4e33e6ee5792f05522f2b17cb (patch)
tree	7e9a13377f52658d398f4f3dc11883f515bdbb3d /eth/downloader/peer.go
parent	4c2933ad825aa11ce118abddfe6eeafc0422b2b6 (diff)
parent	b6f5523bdcded47c4f92b4cb5e6e23287bd6b60d (diff)
download	go-tangerine-f16fab91c8c2b0d4e33e6ee5792f05522f2b17cb.tar.gz go-tangerine-f16fab91c8c2b0d4e33e6ee5792f05522f2b17cb.tar.zst go-tangerine-f16fab91c8c2b0d4e33e6ee5792f05522f2b17cb.zip