aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Assembly.cpp7
-rw-r--r--ControlFlowGraph.cpp91
-rw-r--r--ControlFlowGraph.h22
-rw-r--r--KnownState.cpp75
-rw-r--r--KnownState.h30
-rw-r--r--SemanticInformation.cpp1
-rw-r--r--SemanticInformation.h1
7 files changed, 192 insertions, 35 deletions
diff --git a/Assembly.cpp b/Assembly.cpp
index c7253622..1c539116 100644
--- a/Assembly.cpp
+++ b/Assembly.cpp
@@ -314,6 +314,10 @@ Assembly& Assembly::optimise(bool _enable)
copt << toString(*this);
count = 0;
+ //@todo CFG interface should be a generator, that returns an item and a pointer to a
+ // knownstate, which has to replace the current state if it is not null.
+ // Feed these items to the CSE, but also store them and replace the stored version
+ // if the items generated by the CSE are shorter. (or even use less gas?)
copt << "Performing control flow analysis...";
{
ControlFlowGraph cfg(m_items);
@@ -329,7 +333,8 @@ Assembly& Assembly::optimise(bool _enable)
copt << "Performing common subexpression elimination...";
for (auto iter = m_items.begin(); iter != m_items.end();)
{
- KnownState state;
+ //@todo use only a single state / expression classes instance.
+ KnownState state(make_shared<ExpressionClasses>());
CommonSubexpressionEliminator eliminator(state);
auto orig = iter;
iter = eliminator.feedItems(iter, m_items.end());
diff --git a/ControlFlowGraph.cpp b/ControlFlowGraph.cpp
index cc4367e6..0b0c757d 100644
--- a/ControlFlowGraph.cpp
+++ b/ControlFlowGraph.cpp
@@ -23,9 +23,11 @@
#include <libevmasm/ControlFlowGraph.h>
#include <map>
+#include <memory>
#include <libevmasm/Exceptions.h>
#include <libevmasm/AssemblyItem.h>
#include <libevmasm/SemanticInformation.h>
+#include <libevmasm/KnownState.h>
using namespace std;
using namespace dev;
@@ -46,6 +48,7 @@ AssemblyItems ControlFlowGraph::optimisedItems()
resolveNextLinks();
removeUnusedBlocks();
setPrevLinks();
+ gatherKnowledge();
return rebuildCode();
}
@@ -209,6 +212,77 @@ void ControlFlowGraph::setPrevLinks()
}
}
+void ControlFlowGraph::gatherKnowledge()
+{
+ // @todo actually we know that memory is filled with zeros at the beginning,
+ // we could make use of that.
+ shared_ptr<KnownState> emptyState = make_shared<KnownState>();
+ ExpressionClasses& expr = emptyState->expressionClasses();
+ bool unknownJumpEncountered = false;
+
+ vector<pair<BlockId, shared_ptr<KnownState>>> workQueue({make_pair(BlockId::initial(), emptyState->copy())});
+ while (!workQueue.empty())
+ {
+ //@todo we might have to do something like incrementing the sequence number for each JUMPDEST
+ assertThrow(!!workQueue.back().first, OptimizerException, "");
+ BasicBlock& block = m_blocks.at(workQueue.back().first);
+ shared_ptr<KnownState> state = workQueue.back().second;
+ workQueue.pop_back();
+ if (block.startState)
+ {
+ state->reduceToCommonKnowledge(*block.startState);
+ if (*state == *block.startState)
+ continue;
+ }
+
+ block.startState = state->copy();
+ //@todo we might know the return address for the first pass, but not anymore for the second,
+ // -> store knowledge about tags as a union.
+
+ // Feed all items except for the final jump yet because it will erase the target tag.
+ unsigned pc = block.begin;
+ while (pc < block.end && !SemanticInformation::altersControlFlow(m_items.at(pc)))
+ state->feedItem(m_items.at(pc++));
+
+ if (
+ block.endType == BasicBlock::EndType::JUMP ||
+ block.endType == BasicBlock::EndType::JUMPI
+ )
+ {
+ assertThrow(block.begin <= pc && pc == block.end - 1, OptimizerException, "");
+ //@todo in the case of JUMPI, add knowledge about the condition to the state
+ // (for both values of the condition)
+ BlockId nextBlock = expressionClassToBlockId(
+ state->stackElement(state->stackHeight(), SourceLocation()),
+ expr
+ );
+ state->feedItem(m_items.at(pc++));
+ if (nextBlock)
+ workQueue.push_back(make_pair(nextBlock, state->copy()));
+ else if (!unknownJumpEncountered)
+ {
+ // We do not know where this jump goes, so we have to reset the states of all
+ // JUMPDESTs.
+ unknownJumpEncountered = true;
+ for (auto const& it: m_blocks)
+ if (it.second.begin < it.second.end && m_items[it.second.begin].type() == Tag)
+ workQueue.push_back(make_pair(it.first, emptyState->copy()));
+ }
+ }
+ else if (block.begin <= pc && pc < block.end)
+ state->feedItem(m_items.at(pc++));
+ assertThrow(block.end <= block.begin || pc == block.end, OptimizerException, "");
+
+ block.endState = state;
+
+ if (
+ block.endType == BasicBlock::EndType::HANDOVER ||
+ block.endType == BasicBlock::EndType::JUMPI
+ )
+ workQueue.push_back(make_pair(block.next, state->copy()));
+ }
+}
+
AssemblyItems ControlFlowGraph::rebuildCode()
{
map<BlockId, unsigned> pushes;
@@ -233,7 +307,7 @@ AssemblyItems ControlFlowGraph::rebuildCode()
blockId = m_blocks.at(blockId).prev;
for (; blockId; blockId = m_blocks.at(blockId).next)
{
- BasicBlock const& block = m_blocks.at(blockId);
+ BasicBlock& block = m_blocks.at(blockId);
blocksToAdd.erase(blockId);
blocksAdded.insert(blockId);
@@ -243,7 +317,10 @@ AssemblyItems ControlFlowGraph::rebuildCode()
continue;
// If block starts with unused tag, skip it.
if (previousHandedOver && !pushes[blockId] && begin->type() == Tag)
+ {
++begin;
+ ++block.begin;
+ }
previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER);
copy(begin, end, back_inserter(code));
}
@@ -252,6 +329,18 @@ AssemblyItems ControlFlowGraph::rebuildCode()
return code;
}
+BlockId ControlFlowGraph::expressionClassToBlockId(
+ ExpressionClasses::Id _id,
+ ExpressionClasses& _exprClasses
+)
+{
+ ExpressionClasses::Expression expr = _exprClasses.representative(_id);
+ if (expr.item && expr.item->type() == PushTag)
+ return BlockId(expr.item->data());
+ else
+ return BlockId::invalid();
+}
+
BlockId ControlFlowGraph::generateNewId()
{
BlockId id = BlockId(++m_lastUsedId);
diff --git a/ControlFlowGraph.h b/ControlFlowGraph.h
index 5d16df32..4310d664 100644
--- a/ControlFlowGraph.h
+++ b/ControlFlowGraph.h
@@ -24,16 +24,17 @@
#pragma once
#include <vector>
+#include <memory>
#include <libdevcore/Common.h>
#include <libdevcore/Assertions.h>
+#include <libevmasm/ExpressionClasses.h>
namespace dev
{
namespace eth
{
-class AssemblyItem;
-using AssemblyItems = std::vector<AssemblyItem>;
+class KnownState;
/**
* Identifier for a block, coincides with the tag number of an AssemblyItem but adds a special
@@ -69,14 +70,20 @@ struct BasicBlock
unsigned end = 0;
/// Tags pushed inside this block, with multiplicity.
std::vector<BlockId> pushedTags;
- /// ID of the block that always follows this one (either JUMP or flow into new block),
- /// or BlockId::invalid() otherwise
+ /// ID of the block that always follows this one (either non-branching part of JUMPI or flow
+ /// into new block), or BlockId::invalid() otherwise
BlockId next = BlockId::invalid();
- /// ID of the block that has to precede this one.
+ /// ID of the block that has to precede this one (because control flows into it).
BlockId prev = BlockId::invalid();
enum class EndType { JUMP, JUMPI, STOP, HANDOVER };
EndType endType = EndType::HANDOVER;
+
+ /// Knowledge about the state when this block is entered. Intersection of all possible ways
+ /// to enter this block.
+ std::shared_ptr<KnownState> startState;
+ /// Knowledge about the state at the end of this block.
+ std::shared_ptr<KnownState> endState;
};
class ControlFlowGraph
@@ -93,9 +100,14 @@ private:
void splitBlocks();
void resolveNextLinks();
void removeUnusedBlocks();
+ void gatherKnowledge();
void setPrevLinks();
AssemblyItems rebuildCode();
+ /// @returns the corresponding BlockId if _id is a pushed jump tag,
+ /// and an invalid BlockId otherwise.
+ BlockId expressionClassToBlockId(ExpressionClasses::Id _id, ExpressionClasses& _exprClasses);
+
BlockId generateNewId();
unsigned m_lastUsedId = 0;
diff --git a/KnownState.cpp b/KnownState.cpp
index 632777c8..7ff0143e 100644
--- a/KnownState.cpp
+++ b/KnownState.cpp
@@ -30,16 +30,18 @@ using namespace std;
using namespace dev;
using namespace dev::eth;
-ostream& KnownState::stream(
- ostream& _out,
- map<int, Id> _initialStack,
- map<int, Id> _targetStack
-) const
+ostream& KnownState::stream(ostream& _out) const
{
auto streamExpressionClass = [this](ostream& _out, Id _id)
{
auto const& expr = m_expressionClasses->representative(_id);
- _out << " " << dec << _id << ": " << *expr.item;
+ _out << " " << dec << _id << ": ";
+ if (!expr.item)
+ _out << " no item";
+ else if (expr.item->type() == UndefinedItem)
+ _out << " unknown " << int(expr.item->data());
+ else
+ _out << *expr.item;
if (expr.sequenceNumber)
_out << "@" << dec << expr.sequenceNumber;
_out << "(";
@@ -48,22 +50,32 @@ ostream& KnownState::stream(
_out << ")" << endl;
};
- _out << "Optimizer analysis:" << endl;
- _out << "Final stack height: " << dec << m_stackHeight << endl;
+ _out << "=== State ===" << endl;
+ _out << "Stack height: " << dec << m_stackHeight << endl;
_out << "Equivalence classes: " << endl;
for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass)
streamExpressionClass(_out, eqClass);
- _out << "Initial stack: " << endl;
- for (auto const& it: _initialStack)
+ _out << "Stack: " << endl;
+ for (auto const& it: m_stackElements)
{
_out << " " << dec << it.first << ": ";
streamExpressionClass(_out, it.second);
}
- _out << "Target stack: " << endl;
- for (auto const& it: _targetStack)
+ _out << "Storage: " << endl;
+ for (auto const& it: m_storageContent)
{
- _out << " " << dec << it.first << ": ";
+ _out << " ";
+ streamExpressionClass(_out, it.first);
+ _out << ": ";
+ streamExpressionClass(_out, it.second);
+ }
+ _out << "Memory: " << endl;
+ for (auto const& it: m_memoryContent)
+ {
+ _out << " ";
+ streamExpressionClass(_out, it.first);
+ _out << ": ";
streamExpressionClass(_out, it.second);
}
@@ -73,7 +85,11 @@ ostream& KnownState::stream(
KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem)
{
StoreOperation op;
- if (_item.type() != Operation)
+ if (_item.type() == Tag)
+ {
+ // can be ignored
+ }
+ else if (_item.type() != Operation)
{
assertThrow(_item.deposit() == 1, InvalidDeposit, "");
setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem));
@@ -127,17 +143,40 @@ KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool
resetMemory();
if (SemanticInformation::invalidatesStorage(_item.instruction()))
resetStorage();
- setStackElement(
- m_stackHeight + _item.deposit(),
- m_expressionClasses->find(_item, arguments, _copyItem)
- );
+ assertThrow(info.ret <= 1, InvalidDeposit, "");
+ if (info.ret == 1)
+ setStackElement(
+ m_stackHeight + _item.deposit(),
+ m_expressionClasses->find(_item, arguments, _copyItem)
+ );
}
}
+ for (int p = m_stackHeight; p > m_stackHeight + _item.deposit(); --p)
+ m_stackElements.erase(p);
m_stackHeight += _item.deposit();
}
return op;
}
+void KnownState::reduceToCommonKnowledge(KnownState const& /*_other*/)
+{
+ //@todo
+ *this = KnownState(m_expressionClasses);
+}
+
+bool KnownState::operator==(const KnownState& _other) const
+{
+ //@todo
+ return (
+ m_stackElements.empty() &&
+ _other.m_stackElements.empty() &&
+ m_storageContent.empty() &&
+ _other.m_storageContent.empty() &&
+ m_memoryContent.empty() &&
+ _other.m_memoryContent.empty()
+ );
+}
+
ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location)
{
if (m_stackElements.count(_stackHeight))
diff --git a/KnownState.h b/KnownState.h
index c6dfcee6..f7a3dd67 100644
--- a/KnownState.h
+++ b/KnownState.h
@@ -27,6 +27,7 @@
#include <map>
#include <set>
#include <tuple>
+#include <memory>
#include <ostream>
#include <libdevcore/CommonIO.h>
#include <libdevcore/Exceptions.h>
@@ -70,14 +71,14 @@ public:
Id expression;
};
- KnownState(): m_expressionClasses(std::make_shared<ExpressionClasses>()) {}
+ explicit KnownState(
+ std::shared_ptr<ExpressionClasses> _expressionClasses = std::make_shared<ExpressionClasses>()
+ ): m_expressionClasses(_expressionClasses)
+ {
+ }
/// Streams debugging information to @a _out.
- std::ostream& stream(
- std::ostream& _out,
- std::map<int, Id> _initialStack = std::map<int, Id>(),
- std::map<int, Id> _targetStack = std::map<int, Id>()
- ) const;
+ std::ostream& stream(std::ostream& _out) const;
/// Feeds the item into the system for analysis.
/// @returns a possible store operation
@@ -92,6 +93,20 @@ public:
/// Resets any knowledge.
void reset() { resetStorage(); resetMemory(); resetStack(); }
+ /// Manually increments the storage and memory sequence number.
+ void incrementSequenceNumber() { m_sequenceNumber += 2; }
+
+ /// Replaces the state by the intersection with _other, i.e. only equal knowledge is retained.
+ /// If the stack heighht is different, the smaller one is used and the stack is compared
+ /// relatively.
+ void reduceToCommonKnowledge(KnownState const& _other);
+
+ /// @returns a shared pointer to a copy of this state.
+ std::shared_ptr<KnownState> copy() const { return std::make_shared<KnownState>(*this); }
+
+ /// @returns true if the knowledge about the state of both objects is (known to be) equal.
+ bool operator==(KnownState const& _other) const;
+
///@todo the sequence numbers in two copies of this class should never be the same.
/// might be doable using two-dimensional sequence numbers, where the first value is incremented
/// for each copy
@@ -99,8 +114,7 @@ public:
/// Retrieves the current equivalence class fo the given stack element (or generates a new
/// one if it does not exist yet).
Id stackElement(int _stackHeight, SourceLocation const& _location);
- /// @returns the equivalence class id of the special initial stack element at the given height
- /// (must not be positive).
+ /// @returns the equivalence class id of the special initial stack element at the given height.
Id initialStackElement(int _stackHeight, SourceLocation const& _location);
int stackHeight() const { return m_stackHeight; }
diff --git a/SemanticInformation.cpp b/SemanticInformation.cpp
index 40c36f9e..056162b5 100644
--- a/SemanticInformation.cpp
+++ b/SemanticInformation.cpp
@@ -128,7 +128,6 @@ bool SemanticInformation::isDeterministic(AssemblyItem const& _item)
{
if (_item.type() != Operation)
return true;
- assertThrow(!altersControlFlow(_item), OptimizerException, "");
switch (_item.instruction())
{
diff --git a/SemanticInformation.h b/SemanticInformation.h
index b14ddb65..094f4591 100644
--- a/SemanticInformation.h
+++ b/SemanticInformation.h
@@ -48,7 +48,6 @@ struct SemanticInformation
static bool altersControlFlow(AssemblyItem const& _item);
/// @returns false if the value put on the stack by _item depends on anything else than
/// the information in the current block header, memory, storage or stack.
- /// @note should not be called for instructions that alter the control flow.
static bool isDeterministic(AssemblyItem const& _item);
/// @returns true if the given instruction modifies memory.
static bool invalidatesMemory(Instruction _instruction);