diff options
author | Felix Lange <fjl@twurst.com> | 2015-04-27 06:50:18 +0800 |
---|---|---|
committer | Felix Lange <fjl@twurst.com> | 2015-05-06 22:10:41 +0800 |
commit | 2adcc31bb48af0dee979f2b4ab255d9af21fd097 (patch) | |
tree | e13845f15c96a87ac0fc9345f3a0ee90cfd006da /p2p/discover/table.go | |
parent | d457a1187dbbbf08bcce437789732dab02a73b0f (diff) | |
download | go-tangerine-2adcc31bb48af0dee979f2b4ab255d9af21fd097.tar.gz go-tangerine-2adcc31bb48af0dee979f2b4ab255d9af21fd097.tar.zst go-tangerine-2adcc31bb48af0dee979f2b4ab255d9af21fd097.zip |
p2p/discover: new distance metric based on sha3(id)
The previous metric was pubkey1^pubkey2, as specified in the Kademlia
paper. We missed that EC public keys are not uniformly distributed.
Using the hash of the public keys addresses that. It also makes it
a bit harder to generate node IDs that are close to a particular node.
Diffstat (limited to 'p2p/discover/table.go')
-rw-r--r-- | p2p/discover/table.go | 54 |
1 files changed, 30 insertions, 24 deletions
diff --git a/p2p/discover/table.go b/p2p/discover/table.go index ae10fed5b..2c9cb80d5 100644 --- a/p2p/discover/table.go +++ b/p2p/discover/table.go @@ -7,20 +7,24 @@ package discover import ( + "crypto/rand" "net" "sort" "sync" "time" + "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/logger" "github.com/ethereum/go-ethereum/logger/glog" ) const ( - alpha = 3 // Kademlia concurrency factor - bucketSize = 16 // Kademlia bucket size - nBuckets = nodeIDBits + 1 // Number of buckets + alpha = 3 // Kademlia concurrency factor + bucketSize = 16 // Kademlia bucket size + hashBits = len(common.Hash{}) * 8 + nBuckets = hashBits + 1 // Number of buckets + maxBondingPingPongs = 10 ) @@ -116,21 +120,23 @@ func (tab *Table) Bootstrap(nodes []*Node) { // Lookup performs a network search for nodes close // to the given target. It approaches the target by querying // nodes that are closer to it on each iteration. -func (tab *Table) Lookup(target NodeID) []*Node { +// The given target does not need to be an actual node +// identifier. +func (tab *Table) Lookup(targetID NodeID) []*Node { var ( + target = crypto.Sha3Hash(targetID[:]) asked = make(map[NodeID]bool) seen = make(map[NodeID]bool) reply = make(chan []*Node, alpha) pendingQueries = 0 ) - // don't query further if we hit the target or ourself. + // don't query further if we hit ourself. // unlikely to happen often in practice. - asked[target] = true asked[tab.self.ID] = true tab.mutex.Lock() // update last lookup stamp (for refresh logic) - tab.buckets[logdist(tab.self.ID, target)].lastLookup = time.Now() + tab.buckets[logdist(tab.self.sha, target)].lastLookup = time.Now() // generate initial result set result := tab.closest(target, bucketSize) tab.mutex.Unlock() @@ -143,7 +149,7 @@ func (tab *Table) Lookup(target NodeID) []*Node { asked[n.ID] = true pendingQueries++ go func() { - r, _ := tab.net.findnode(n.ID, n.addr(), target) + r, _ := tab.net.findnode(n.ID, n.addr(), targetID) reply <- tab.bondall(r) }() } @@ -166,17 +172,16 @@ func (tab *Table) Lookup(target NodeID) []*Node { // refresh performs a lookup for a random target to keep buckets full. func (tab *Table) refresh() { - ld := -1 // logdist of chosen bucket - tab.mutex.Lock() - for i, b := range tab.buckets { - if i > 0 && b.lastLookup.Before(time.Now().Add(-1*time.Hour)) { - ld = i - break - } - } - tab.mutex.Unlock() - - result := tab.Lookup(randomID(tab.self.ID, ld)) + // The Kademlia paper specifies that the bucket refresh should + // perform a refresh in the least recently used bucket. We cannot + // adhere to this because the findnode target is a 512bit value + // (not hash-sized) and it is not easily possible to generate a + // sha3 preimage that falls into a chosen bucket. + // + // We perform a lookup with a random target instead. + var target NodeID + rand.Read(target[:]) + result := tab.Lookup(target) if len(result) == 0 { // Pick a batch of previously know seeds to lookup with seeds := tab.db.querySeeds(10) @@ -196,7 +201,7 @@ func (tab *Table) refresh() { // closest returns the n nodes in the table that are closest to the // given id. The caller must hold tab.mutex. -func (tab *Table) closest(target NodeID, nresults int) *nodesByDistance { +func (tab *Table) closest(target common.Hash, nresults int) *nodesByDistance { // This is a very wasteful way to find the closest nodes but // obviously correct. I believe that tree-based buckets would make // this easier to implement efficiently. @@ -278,7 +283,8 @@ func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16 } tab.mutex.Lock() defer tab.mutex.Unlock() - if b := tab.buckets[logdist(tab.self.ID, n.ID)]; !b.bump(n) { + b := tab.buckets[logdist(tab.self.sha, n.sha)] + if !b.bump(n) { tab.pingreplace(n, b) } return n, nil @@ -346,7 +352,7 @@ outer: // don't add self. continue } - bucket := tab.buckets[logdist(tab.self.ID, n.ID)] + bucket := tab.buckets[logdist(tab.self.sha, n.sha)] for i := range bucket.entries { if bucket.entries[i].ID == n.ID { // already in bucket @@ -375,13 +381,13 @@ func (b *bucket) bump(n *Node) bool { // distance to target. type nodesByDistance struct { entries []*Node - target NodeID + target common.Hash } // push adds the given node to the list, keeping the total size below maxElems. func (h *nodesByDistance) push(n *Node, maxElems int) { ix := sort.Search(len(h.entries), func(i int) bool { - return distcmp(h.target, h.entries[i].ID, n.ID) > 0 + return distcmp(h.target, h.entries[i].sha, n.sha) > 0 }) if len(h.entries) < maxElems { h.entries = append(h.entries, n) |