diff options
author | Felix Lange <fjl@users.noreply.github.com> | 2018-10-12 02:32:14 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-10-12 02:32:14 +0800 |
commit | dcae0d348bb7f5d9052e50a83383a33538ce376a (patch) | |
tree | 9dee15f209bd31c3adf32fef3835a06bda8f18e4 /p2p/simulations/network.go | |
parent | f951e23fb5ad2f7017f314a95287bc0506a67d05 (diff) | |
download | dexon-dcae0d348bb7f5d9052e50a83383a33538ce376a.tar.gz dexon-dcae0d348bb7f5d9052e50a83383a33538ce376a.tar.zst dexon-dcae0d348bb7f5d9052e50a83383a33538ce376a.zip |
p2p/simulations: fix a deadlock and clean up adapters (#17891)
This fixes a rare deadlock with the inproc adapter:
- A node is stopped, which acquires Network.lock.
- The protocol code being simulated (swarm/network in my case)
waits for its goroutines to shut down.
- One of those goroutines calls into the simulation to add a peer,
which waits for Network.lock.
The fix for the deadlock is really simple, just release the lock
before stopping the simulation node.
Other changes in this PR clean up the exec adapter so it reports
node startup errors better and remove the docker adapter because
it just adds overhead.
In the exec adapter, node information is now posted to a one-shot
server. This avoids log parsing and allows reporting startup
errors to the simulation host.
A small change in package node was needed because simulation
nodes use port zero. Node.{HTTP,WS}Endpoint now return the live
endpoints after startup by checking the TCP listener.
Diffstat (limited to 'p2p/simulations/network.go')
-rw-r--r-- | p2p/simulations/network.go | 44 |
1 files changed, 24 insertions, 20 deletions
diff --git a/p2p/simulations/network.go b/p2p/simulations/network.go index 101ac09f8..200015ff3 100644 --- a/p2p/simulations/network.go +++ b/p2p/simulations/network.go @@ -116,7 +116,7 @@ func (net *Network) NewNodeWithConfig(conf *adapters.NodeConfig) (*Node, error) Node: adapterNode, Config: conf, } - log.Trace(fmt.Sprintf("node %v created", conf.ID)) + log.Trace("Node created", "id", conf.ID) net.nodeMap[conf.ID] = len(net.Nodes) net.Nodes = append(net.Nodes, node) @@ -167,6 +167,7 @@ func (net *Network) Start(id enode.ID) error { func (net *Network) startWithSnapshots(id enode.ID, snapshots map[string][]byte) error { net.lock.Lock() defer net.lock.Unlock() + node := net.getNode(id) if node == nil { return fmt.Errorf("node %v does not exist", id) @@ -174,13 +175,13 @@ func (net *Network) startWithSnapshots(id enode.ID, snapshots map[string][]byte) if node.Up { return fmt.Errorf("node %v already up", id) } - log.Trace(fmt.Sprintf("starting node %v: %v using %v", id, node.Up, net.nodeAdapter.Name())) + log.Trace("Starting node", "id", id, "adapter", net.nodeAdapter.Name()) if err := node.Start(snapshots); err != nil { - log.Warn(fmt.Sprintf("start up failed: %v", err)) + log.Warn("Node startup failed", "id", id, "err", err) return err } node.Up = true - log.Info(fmt.Sprintf("started node %v: %v", id, node.Up)) + log.Info("Started node", "id", id) net.events.Send(NewEvent(node)) @@ -209,7 +210,6 @@ func (net *Network) watchPeerEvents(id enode.ID, events chan *p2p.PeerEvent, sub defer net.lock.Unlock() node := net.getNode(id) if node == nil { - log.Error("Can not find node for id", "id", id) return } node.Up = false @@ -240,7 +240,7 @@ func (net *Network) watchPeerEvents(id enode.ID, events chan *p2p.PeerEvent, sub case err := <-sub.Err(): if err != nil { - log.Error(fmt.Sprintf("error getting peer events for node %v", id), "err", err) + log.Error("Error in peer event subscription", "id", id, "err", err) } return } @@ -250,7 +250,6 @@ func (net *Network) watchPeerEvents(id enode.ID, events chan *p2p.PeerEvent, sub // Stop stops the node with the given ID func (net *Network) Stop(id enode.ID) error { net.lock.Lock() - defer net.lock.Unlock() node := net.getNode(id) if node == nil { return fmt.Errorf("node %v does not exist", id) @@ -258,12 +257,17 @@ func (net *Network) Stop(id enode.ID) error { if !node.Up { return fmt.Errorf("node %v already down", id) } - if err := node.Stop(); err != nil { - return err - } node.Up = false - log.Info(fmt.Sprintf("stop node %v: %v", id, node.Up)) + net.lock.Unlock() + err := node.Stop() + if err != nil { + net.lock.Lock() + node.Up = true + net.lock.Unlock() + return err + } + log.Info("Stopped node", "id", id, "err", err) net.events.Send(ControlEvent(node)) return nil } @@ -271,7 +275,7 @@ func (net *Network) Stop(id enode.ID) error { // Connect connects two nodes together by calling the "admin_addPeer" RPC // method on the "one" node so that it connects to the "other" node func (net *Network) Connect(oneID, otherID enode.ID) error { - log.Debug(fmt.Sprintf("connecting %s to %s", oneID, otherID)) + log.Debug("Connecting nodes with addPeer", "id", oneID, "other", otherID) conn, err := net.InitConn(oneID, otherID) if err != nil { return err @@ -481,10 +485,10 @@ func (net *Network) InitConn(oneID, otherID enode.ID) (*Conn, error) { err = conn.nodesUp() if err != nil { - log.Trace(fmt.Sprintf("nodes not up: %v", err)) + log.Trace("Nodes not up", "err", err) return nil, fmt.Errorf("nodes not up: %v", err) } - log.Debug("InitConn - connection initiated") + log.Debug("Connection initiated", "id", oneID, "other", otherID) conn.initiated = time.Now() return conn, nil } @@ -492,9 +496,9 @@ func (net *Network) InitConn(oneID, otherID enode.ID) (*Conn, error) { // Shutdown stops all nodes in the network and closes the quit channel func (net *Network) Shutdown() { for _, node := range net.Nodes { - log.Debug(fmt.Sprintf("stopping node %s", node.ID().TerminalString())) + log.Debug("Stopping node", "id", node.ID()) if err := node.Stop(); err != nil { - log.Warn(fmt.Sprintf("error stopping node %s", node.ID().TerminalString()), "err", err) + log.Warn("Can't stop node", "id", node.ID(), "err", err) } } close(net.quitc) @@ -708,18 +712,18 @@ func (net *Network) Subscribe(events chan *Event) { } func (net *Network) executeControlEvent(event *Event) { - log.Trace("execute control event", "type", event.Type, "event", event) + log.Trace("Executing control event", "type", event.Type, "event", event) switch event.Type { case EventTypeNode: if err := net.executeNodeEvent(event); err != nil { - log.Error("error executing node event", "event", event, "err", err) + log.Error("Error executing node event", "event", event, "err", err) } case EventTypeConn: if err := net.executeConnEvent(event); err != nil { - log.Error("error executing conn event", "event", event, "err", err) + log.Error("Error executing conn event", "event", event, "err", err) } case EventTypeMsg: - log.Warn("ignoring control msg event") + log.Warn("Ignoring control msg event") } } |