diff options
author | chriseth <c@ethdev.com> | 2015-05-04 16:15:41 +0800 |
---|---|---|
committer | chriseth <c@ethdev.com> | 2015-05-06 18:53:17 +0800 |
commit | 9d7eb49f35f801b53960135b7c353fa64cea7439 (patch) | |
tree | 07181ef831d3a577a6fdfbfc92f8aff6dc168956 | |
parent | a2e3bcbd0c45a79a9709dc8a69858765ab904805 (diff) | |
download | dexon-solidity-9d7eb49f35f801b53960135b7c353fa64cea7439.tar.gz dexon-solidity-9d7eb49f35f801b53960135b7c353fa64cea7439.tar.zst dexon-solidity-9d7eb49f35f801b53960135b7c353fa64cea7439.zip |
Gather knowledge about the state during control flow analysis.
-rw-r--r-- | Assembly.cpp | 7 | ||||
-rw-r--r-- | ControlFlowGraph.cpp | 91 | ||||
-rw-r--r-- | ControlFlowGraph.h | 22 | ||||
-rw-r--r-- | KnownState.cpp | 75 | ||||
-rw-r--r-- | KnownState.h | 30 | ||||
-rw-r--r-- | SemanticInformation.cpp | 1 | ||||
-rw-r--r-- | SemanticInformation.h | 1 |
7 files changed, 192 insertions, 35 deletions
diff --git a/Assembly.cpp b/Assembly.cpp index c7253622..1c539116 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -314,6 +314,10 @@ Assembly& Assembly::optimise(bool _enable) copt << toString(*this); count = 0; + //@todo CFG interface should be a generator, that returns an item and a pointer to a + // knownstate, which has to replace the current state if it is not null. + // Feed these items to the CSE, but also store them and replace the stored version + // if the items generated by the CSE are shorter. (or even use less gas?) copt << "Performing control flow analysis..."; { ControlFlowGraph cfg(m_items); @@ -329,7 +333,8 @@ Assembly& Assembly::optimise(bool _enable) copt << "Performing common subexpression elimination..."; for (auto iter = m_items.begin(); iter != m_items.end();) { - KnownState state; + //@todo use only a single state / expression classes instance. + KnownState state(make_shared<ExpressionClasses>()); CommonSubexpressionEliminator eliminator(state); auto orig = iter; iter = eliminator.feedItems(iter, m_items.end()); diff --git a/ControlFlowGraph.cpp b/ControlFlowGraph.cpp index cc4367e6..0b0c757d 100644 --- a/ControlFlowGraph.cpp +++ b/ControlFlowGraph.cpp @@ -23,9 +23,11 @@ #include <libevmasm/ControlFlowGraph.h> #include <map> +#include <memory> #include <libevmasm/Exceptions.h> #include <libevmasm/AssemblyItem.h> #include <libevmasm/SemanticInformation.h> +#include <libevmasm/KnownState.h> using namespace std; using namespace dev; @@ -46,6 +48,7 @@ AssemblyItems ControlFlowGraph::optimisedItems() resolveNextLinks(); removeUnusedBlocks(); setPrevLinks(); + gatherKnowledge(); return rebuildCode(); } @@ -209,6 +212,77 @@ void ControlFlowGraph::setPrevLinks() } } +void ControlFlowGraph::gatherKnowledge() +{ + // @todo actually we know that memory is filled with zeros at the beginning, + // we could make use of that. + shared_ptr<KnownState> emptyState = make_shared<KnownState>(); + ExpressionClasses& expr = emptyState->expressionClasses(); + bool unknownJumpEncountered = false; + + vector<pair<BlockId, shared_ptr<KnownState>>> workQueue({make_pair(BlockId::initial(), emptyState->copy())}); + while (!workQueue.empty()) + { + //@todo we might have to do something like incrementing the sequence number for each JUMPDEST + assertThrow(!!workQueue.back().first, OptimizerException, ""); + BasicBlock& block = m_blocks.at(workQueue.back().first); + shared_ptr<KnownState> state = workQueue.back().second; + workQueue.pop_back(); + if (block.startState) + { + state->reduceToCommonKnowledge(*block.startState); + if (*state == *block.startState) + continue; + } + + block.startState = state->copy(); + //@todo we might know the return address for the first pass, but not anymore for the second, + // -> store knowledge about tags as a union. + + // Feed all items except for the final jump yet because it will erase the target tag. + unsigned pc = block.begin; + while (pc < block.end && !SemanticInformation::altersControlFlow(m_items.at(pc))) + state->feedItem(m_items.at(pc++)); + + if ( + block.endType == BasicBlock::EndType::JUMP || + block.endType == BasicBlock::EndType::JUMPI + ) + { + assertThrow(block.begin <= pc && pc == block.end - 1, OptimizerException, ""); + //@todo in the case of JUMPI, add knowledge about the condition to the state + // (for both values of the condition) + BlockId nextBlock = expressionClassToBlockId( + state->stackElement(state->stackHeight(), SourceLocation()), + expr + ); + state->feedItem(m_items.at(pc++)); + if (nextBlock) + workQueue.push_back(make_pair(nextBlock, state->copy())); + else if (!unknownJumpEncountered) + { + // We do not know where this jump goes, so we have to reset the states of all + // JUMPDESTs. + unknownJumpEncountered = true; + for (auto const& it: m_blocks) + if (it.second.begin < it.second.end && m_items[it.second.begin].type() == Tag) + workQueue.push_back(make_pair(it.first, emptyState->copy())); + } + } + else if (block.begin <= pc && pc < block.end) + state->feedItem(m_items.at(pc++)); + assertThrow(block.end <= block.begin || pc == block.end, OptimizerException, ""); + + block.endState = state; + + if ( + block.endType == BasicBlock::EndType::HANDOVER || + block.endType == BasicBlock::EndType::JUMPI + ) + workQueue.push_back(make_pair(block.next, state->copy())); + } +} + AssemblyItems ControlFlowGraph::rebuildCode() { map<BlockId, unsigned> pushes; @@ -233,7 +307,7 @@ AssemblyItems ControlFlowGraph::rebuildCode() blockId = m_blocks.at(blockId).prev; for (; blockId; blockId = m_blocks.at(blockId).next) { - BasicBlock const& block = m_blocks.at(blockId); + BasicBlock& block = m_blocks.at(blockId); blocksToAdd.erase(blockId); blocksAdded.insert(blockId); @@ -243,7 +317,10 @@ AssemblyItems ControlFlowGraph::rebuildCode() continue; // If block starts with unused tag, skip it. if (previousHandedOver && !pushes[blockId] && begin->type() == Tag) + { ++begin; + ++block.begin; + } previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER); copy(begin, end, back_inserter(code)); } @@ -252,6 +329,18 @@ AssemblyItems ControlFlowGraph::rebuildCode() return code; } +BlockId ControlFlowGraph::expressionClassToBlockId( + ExpressionClasses::Id _id, + ExpressionClasses& _exprClasses +) +{ + ExpressionClasses::Expression expr = _exprClasses.representative(_id); + if (expr.item && expr.item->type() == PushTag) + return BlockId(expr.item->data()); + else + return BlockId::invalid(); +} + BlockId ControlFlowGraph::generateNewId() { BlockId id = BlockId(++m_lastUsedId); diff --git a/ControlFlowGraph.h b/ControlFlowGraph.h index 5d16df32..4310d664 100644 --- a/ControlFlowGraph.h +++ b/ControlFlowGraph.h @@ -24,16 +24,17 @@ #pragma once #include <vector> +#include <memory> #include <libdevcore/Common.h> #include <libdevcore/Assertions.h> +#include <libevmasm/ExpressionClasses.h> namespace dev { namespace eth { -class AssemblyItem; -using AssemblyItems = std::vector<AssemblyItem>; +class KnownState; /** * Identifier for a block, coincides with the tag number of an AssemblyItem but adds a special @@ -69,14 +70,20 @@ struct BasicBlock unsigned end = 0; /// Tags pushed inside this block, with multiplicity. std::vector<BlockId> pushedTags; - /// ID of the block that always follows this one (either JUMP or flow into new block), - /// or BlockId::invalid() otherwise + /// ID of the block that always follows this one (either non-branching part of JUMPI or flow + /// into new block), or BlockId::invalid() otherwise BlockId next = BlockId::invalid(); - /// ID of the block that has to precede this one. + /// ID of the block that has to precede this one (because control flows into it). BlockId prev = BlockId::invalid(); enum class EndType { JUMP, JUMPI, STOP, HANDOVER }; EndType endType = EndType::HANDOVER; + + /// Knowledge about the state when this block is entered. Intersection of all possible ways + /// to enter this block. + std::shared_ptr<KnownState> startState; + /// Knowledge about the state at the end of this block. + std::shared_ptr<KnownState> endState; }; class ControlFlowGraph @@ -93,9 +100,14 @@ private: void splitBlocks(); void resolveNextLinks(); void removeUnusedBlocks(); + void gatherKnowledge(); void setPrevLinks(); AssemblyItems rebuildCode(); + /// @returns the corresponding BlockId if _id is a pushed jump tag, + /// and an invalid BlockId otherwise. + BlockId expressionClassToBlockId(ExpressionClasses::Id _id, ExpressionClasses& _exprClasses); + BlockId generateNewId(); unsigned m_lastUsedId = 0; diff --git a/KnownState.cpp b/KnownState.cpp index 632777c8..7ff0143e 100644 --- a/KnownState.cpp +++ b/KnownState.cpp @@ -30,16 +30,18 @@ using namespace std; using namespace dev; using namespace dev::eth; -ostream& KnownState::stream( - ostream& _out, - map<int, Id> _initialStack, - map<int, Id> _targetStack -) const +ostream& KnownState::stream(ostream& _out) const { auto streamExpressionClass = [this](ostream& _out, Id _id) { auto const& expr = m_expressionClasses->representative(_id); - _out << " " << dec << _id << ": " << *expr.item; + _out << " " << dec << _id << ": "; + if (!expr.item) + _out << " no item"; + else if (expr.item->type() == UndefinedItem) + _out << " unknown " << int(expr.item->data()); + else + _out << *expr.item; if (expr.sequenceNumber) _out << "@" << dec << expr.sequenceNumber; _out << "("; @@ -48,22 +50,32 @@ ostream& KnownState::stream( _out << ")" << endl; }; - _out << "Optimizer analysis:" << endl; - _out << "Final stack height: " << dec << m_stackHeight << endl; + _out << "=== State ===" << endl; + _out << "Stack height: " << dec << m_stackHeight << endl; _out << "Equivalence classes: " << endl; for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass) streamExpressionClass(_out, eqClass); - _out << "Initial stack: " << endl; - for (auto const& it: _initialStack) + _out << "Stack: " << endl; + for (auto const& it: m_stackElements) { _out << " " << dec << it.first << ": "; streamExpressionClass(_out, it.second); } - _out << "Target stack: " << endl; - for (auto const& it: _targetStack) + _out << "Storage: " << endl; + for (auto const& it: m_storageContent) { - _out << " " << dec << it.first << ": "; + _out << " "; + streamExpressionClass(_out, it.first); + _out << ": "; + streamExpressionClass(_out, it.second); + } + _out << "Memory: " << endl; + for (auto const& it: m_memoryContent) + { + _out << " "; + streamExpressionClass(_out, it.first); + _out << ": "; streamExpressionClass(_out, it.second); } @@ -73,7 +85,11 @@ ostream& KnownState::stream( KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem) { StoreOperation op; - if (_item.type() != Operation) + if (_item.type() == Tag) + { + // can be ignored + } + else if (_item.type() != Operation) { assertThrow(_item.deposit() == 1, InvalidDeposit, ""); setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem)); @@ -127,17 +143,40 @@ KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool resetMemory(); if (SemanticInformation::invalidatesStorage(_item.instruction())) resetStorage(); - setStackElement( - m_stackHeight + _item.deposit(), - m_expressionClasses->find(_item, arguments, _copyItem) - ); + assertThrow(info.ret <= 1, InvalidDeposit, ""); + if (info.ret == 1) + setStackElement( + m_stackHeight + _item.deposit(), + m_expressionClasses->find(_item, arguments, _copyItem) + ); } } + for (int p = m_stackHeight; p > m_stackHeight + _item.deposit(); --p) + m_stackElements.erase(p); m_stackHeight += _item.deposit(); } return op; } +void KnownState::reduceToCommonKnowledge(KnownState const& /*_other*/) +{ + //@todo + *this = KnownState(m_expressionClasses); +} + +bool KnownState::operator==(const KnownState& _other) const +{ + //@todo + return ( + m_stackElements.empty() && + _other.m_stackElements.empty() && + m_storageContent.empty() && + _other.m_storageContent.empty() && + m_memoryContent.empty() && + _other.m_memoryContent.empty() + ); +} + ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location) { if (m_stackElements.count(_stackHeight)) diff --git a/KnownState.h b/KnownState.h index c6dfcee6..f7a3dd67 100644 --- a/KnownState.h +++ b/KnownState.h @@ -27,6 +27,7 @@ #include <map> #include <set> #include <tuple> +#include <memory> #include <ostream> #include <libdevcore/CommonIO.h> #include <libdevcore/Exceptions.h> @@ -70,14 +71,14 @@ public: Id expression; }; - KnownState(): m_expressionClasses(std::make_shared<ExpressionClasses>()) {} + explicit KnownState( + std::shared_ptr<ExpressionClasses> _expressionClasses = std::make_shared<ExpressionClasses>() + ): m_expressionClasses(_expressionClasses) + { + } /// Streams debugging information to @a _out. - std::ostream& stream( - std::ostream& _out, - std::map<int, Id> _initialStack = std::map<int, Id>(), - std::map<int, Id> _targetStack = std::map<int, Id>() - ) const; + std::ostream& stream(std::ostream& _out) const; /// Feeds the item into the system for analysis. /// @returns a possible store operation @@ -92,6 +93,20 @@ public: /// Resets any knowledge. void reset() { resetStorage(); resetMemory(); resetStack(); } + /// Manually increments the storage and memory sequence number. + void incrementSequenceNumber() { m_sequenceNumber += 2; } + + /// Replaces the state by the intersection with _other, i.e. only equal knowledge is retained. + /// If the stack heighht is different, the smaller one is used and the stack is compared + /// relatively. + void reduceToCommonKnowledge(KnownState const& _other); + + /// @returns a shared pointer to a copy of this state. + std::shared_ptr<KnownState> copy() const { return std::make_shared<KnownState>(*this); } + + /// @returns true if the knowledge about the state of both objects is (known to be) equal. + bool operator==(KnownState const& _other) const; + ///@todo the sequence numbers in two copies of this class should never be the same. /// might be doable using two-dimensional sequence numbers, where the first value is incremented /// for each copy @@ -99,8 +114,7 @@ public: /// Retrieves the current equivalence class fo the given stack element (or generates a new /// one if it does not exist yet). Id stackElement(int _stackHeight, SourceLocation const& _location); - /// @returns the equivalence class id of the special initial stack element at the given height - /// (must not be positive). + /// @returns the equivalence class id of the special initial stack element at the given height. Id initialStackElement(int _stackHeight, SourceLocation const& _location); int stackHeight() const { return m_stackHeight; } diff --git a/SemanticInformation.cpp b/SemanticInformation.cpp index 40c36f9e..056162b5 100644 --- a/SemanticInformation.cpp +++ b/SemanticInformation.cpp @@ -128,7 +128,6 @@ bool SemanticInformation::isDeterministic(AssemblyItem const& _item) { if (_item.type() != Operation) return true; - assertThrow(!altersControlFlow(_item), OptimizerException, ""); switch (_item.instruction()) { diff --git a/SemanticInformation.h b/SemanticInformation.h index b14ddb65..094f4591 100644 --- a/SemanticInformation.h +++ b/SemanticInformation.h @@ -48,7 +48,6 @@ struct SemanticInformation static bool altersControlFlow(AssemblyItem const& _item); /// @returns false if the value put on the stack by _item depends on anything else than /// the information in the current block header, memory, storage or stack. - /// @note should not be called for instructions that alter the control flow. static bool isDeterministic(AssemblyItem const& _item); /// @returns true if the given instruction modifies memory. static bool invalidatesMemory(Instruction _instruction); |