aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorchriseth <c@ethdev.com>2015-05-09 00:58:42 +0800
committerchriseth <c@ethdev.com>2015-05-09 00:58:42 +0800
commit6cc71a188f3c59b32ac1f131ee74c703f1f81a70 (patch)
treef5f228b61fd53b6072db548c705463d05e26e2be
parent4d62c463d143c93f7938db5b8f7d01d33aa1a698 (diff)
parent1dfcb4735011dfaa143d6592713ec6b4bf097934 (diff)
downloaddexon-solidity-6cc71a188f3c59b32ac1f131ee74c703f1f81a70.tar.gz
dexon-solidity-6cc71a188f3c59b32ac1f131ee74c703f1f81a70.tar.zst
dexon-solidity-6cc71a188f3c59b32ac1f131ee74c703f1f81a70.zip
Merge pull request #1813 from chriseth/sol_knowledgeEngine
Static Analysis Engine.
-rw-r--r--Assembly.cpp16
-rw-r--r--CommonSubexpressionEliminator.cpp271
-rw-r--r--CommonSubexpressionEliminator.h63
-rw-r--r--ControlFlowGraph.cpp109
-rw-r--r--ControlFlowGraph.h32
-rw-r--r--ExpressionClasses.cpp44
-rw-r--r--ExpressionClasses.h4
-rw-r--r--KnownState.cpp326
-rw-r--r--KnownState.h163
-rw-r--r--SemanticInformation.cpp53
-rw-r--r--SemanticInformation.h8
11 files changed, 742 insertions, 347 deletions
diff --git a/Assembly.cpp b/Assembly.cpp
index 6cc09a4b..9530ded4 100644
--- a/Assembly.cpp
+++ b/Assembly.cpp
@@ -304,9 +304,6 @@ Assembly& Assembly::optimise(bool _enable)
{
if (!_enable)
return *this;
- std::vector<pair<AssemblyItems, function<AssemblyItems(AssemblyItemsConstRef)>>> rules;
- // jump to next instruction
- rules.push_back({ { PushTag, Instruction::JUMP, Tag }, [](AssemblyItemsConstRef m) -> AssemblyItems { if (m[0].data() == m[2].data()) return {m[2]}; else return m.toVector(); }});
unsigned total = 0;
for (unsigned count = 1; count > 0; total += count)
@@ -314,10 +311,17 @@ Assembly& Assembly::optimise(bool _enable)
copt << toString(*this);
count = 0;
+ //@todo CFG interface should be a generator, that returns an item and a pointer to a
+ // knownstate, which has to replace the current state if it is not null.
+ // Feed these items to the CSE, but also store them and replace the stored version
+ // if the items generated by the CSE are shorter. (or even use less gas?)
copt << "Performing control flow analysis...";
{
ControlFlowGraph cfg(m_items);
- AssemblyItems optItems = cfg.optimisedItems();
+ AssemblyItems optItems;
+ for (BasicBlock const& block: cfg.optimisedBlocks())
+ copy(m_items.begin() + block.begin, m_items.begin() + block.end,
+ back_inserter(optItems));
if (optItems.size() < m_items.size())
{
copt << "Old size: " << m_items.size() << ", new size: " << optItems.size();
@@ -329,7 +333,9 @@ Assembly& Assembly::optimise(bool _enable)
copt << "Performing common subexpression elimination...";
for (auto iter = m_items.begin(); iter != m_items.end();)
{
- CommonSubexpressionEliminator eliminator;
+ //@todo use only a single state / expression classes instance.
+ KnownState state(make_shared<ExpressionClasses>());
+ CommonSubexpressionEliminator eliminator(state);
auto orig = iter;
iter = eliminator.feedItems(iter, m_items.end());
AssemblyItems optItems;
diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp
index 63524d6f..4b85eba4 100644
--- a/CommonSubexpressionEliminator.cpp
+++ b/CommonSubexpressionEliminator.cpp
@@ -37,18 +37,19 @@ vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems()
map<int, Id> initialStackContents;
map<int, Id> targetStackContents;
- int minHeight = m_stackHeight + 1;
- if (!m_stackElements.empty())
- minHeight = min(minHeight, m_stackElements.begin()->first);
- for (int height = minHeight; height <= 0; ++height)
- initialStackContents[height] = initialStackElement(height, SourceLocation());
- for (int height = minHeight; height <= m_stackHeight; ++height)
- targetStackContents[height] = stackElement(height, SourceLocation());
+ int minHeight = m_state.stackHeight() + 1;
+ if (!m_state.stackElements().empty())
+ minHeight = min(minHeight, m_state.stackElements().begin()->first);
+ for (int height = minHeight; height <= m_initialState.stackHeight(); ++height)
+ initialStackContents[height] = m_initialState.stackElement(height, SourceLocation());
+ for (int height = minHeight; height <= m_state.stackHeight(); ++height)
+ targetStackContents[height] = m_state.stackElement(height, SourceLocation());
// Debug info:
//stream(cout, initialStackContents, targetStackContents);
- AssemblyItems items = CSECodeGenerator(m_expressionClasses, m_storeOperations).generateCode(
+ AssemblyItems items = CSECodeGenerator(m_state.expressionClasses(), m_storeOperations).generateCode(
+ m_initialState.stackHeight(),
initialStackContents,
targetStackContents
);
@@ -57,103 +58,11 @@ vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems()
return items;
}
-ostream& CommonSubexpressionEliminator::stream(
- ostream& _out,
- map<int, Id> _initialStack,
- map<int, Id> _targetStack
-) const
-{
- auto streamExpressionClass = [this](ostream& _out, Id _id)
- {
- auto const& expr = m_expressionClasses.representative(_id);
- _out << " " << dec << _id << ": " << *expr.item;
- if (expr.sequenceNumber)
- _out << "@" << dec << expr.sequenceNumber;
- _out << "(";
- for (Id arg: expr.arguments)
- _out << dec << arg << ",";
- _out << ")" << endl;
- };
-
- _out << "Optimizer analysis:" << endl;
- _out << "Final stack height: " << dec << m_stackHeight << endl;
- _out << "Equivalence classes: " << endl;
- for (Id eqClass = 0; eqClass < m_expressionClasses.size(); ++eqClass)
- streamExpressionClass(_out, eqClass);
-
- _out << "Initial stack: " << endl;
- for (auto const& it: _initialStack)
- {
- _out << " " << dec << it.first << ": ";
- streamExpressionClass(_out, it.second);
- }
- _out << "Target stack: " << endl;
- for (auto const& it: _targetStack)
- {
- _out << " " << dec << it.first << ": ";
- streamExpressionClass(_out, it.second);
- }
-
- return _out;
-}
-
void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _copyItem)
{
- if (_item.type() != Operation)
- {
- assertThrow(_item.deposit() == 1, InvalidDeposit, "");
- setStackElement(++m_stackHeight, m_expressionClasses.find(_item, {}, _copyItem));
- }
- else
- {
- Instruction instruction = _item.instruction();
- InstructionInfo info = instructionInfo(instruction);
- if (SemanticInformation::isDupInstruction(_item))
- setStackElement(
- m_stackHeight + 1,
- stackElement(
- m_stackHeight - int(instruction) + int(Instruction::DUP1),
- _item.getLocation()
- )
- );
- else if (SemanticInformation::isSwapInstruction(_item))
- swapStackElements(
- m_stackHeight,
- m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1),
- _item.getLocation()
- );
- else if (instruction != Instruction::POP)
- {
- vector<Id> arguments(info.args);
- for (int i = 0; i < info.args; ++i)
- arguments[i] = stackElement(m_stackHeight - i, _item.getLocation());
- if (_item.instruction() == Instruction::SSTORE)
- storeInStorage(arguments[0], arguments[1], _item.getLocation());
- else if (_item.instruction() == Instruction::SLOAD)
- setStackElement(
- m_stackHeight + _item.deposit(),
- loadFromStorage(arguments[0], _item.getLocation())
- );
- else if (_item.instruction() == Instruction::MSTORE)
- storeInMemory(arguments[0], arguments[1], _item.getLocation());
- else if (_item.instruction() == Instruction::MLOAD)
- setStackElement(
- m_stackHeight + _item.deposit(),
- loadFromMemory(arguments[0], _item.getLocation())
- );
- else if (_item.instruction() == Instruction::SHA3)
- setStackElement(
- m_stackHeight + _item.deposit(),
- applySha3(arguments.at(0), arguments.at(1), _item.getLocation())
- );
- else
- setStackElement(
- m_stackHeight + _item.deposit(),
- m_expressionClasses.find(_item, arguments, _copyItem)
- );
- }
- m_stackHeight += _item.deposit();
- }
+ StoreOperation op = m_state.feedItem(_item, _copyItem);
+ if (op.isValid())
+ m_storeOperations.push_back(op);
}
void CommonSubexpressionEliminator::optimizeBreakingItem()
@@ -164,20 +73,20 @@ void CommonSubexpressionEliminator::optimizeBreakingItem()
SourceLocation const& location = m_breakingItem->getLocation();
AssemblyItem::JumpType jumpType = m_breakingItem->getJumpType();
- Id condition = stackElement(m_stackHeight - 1, location);
- Id zero = m_expressionClasses.find(u256(0));
- if (m_expressionClasses.knownToBeDifferent(condition, zero))
+ Id condition = m_state.stackElement(m_state.stackHeight() - 1, location);
+ Id zero = m_state.expressionClasses().find(u256(0));
+ if (m_state.expressionClasses().knownToBeDifferent(condition, zero))
{
feedItem(AssemblyItem(Instruction::SWAP1, location), true);
feedItem(AssemblyItem(Instruction::POP, location), true);
AssemblyItem item(Instruction::JUMP, location);
item.setJumpType(jumpType);
- m_breakingItem = m_expressionClasses.storeItem(item);
+ m_breakingItem = m_state.expressionClasses().storeItem(item);
return;
}
- Id negatedCondition = m_expressionClasses.find(Instruction::ISZERO, {condition});
- if (m_expressionClasses.knownToBeDifferent(negatedCondition, zero))
+ Id negatedCondition = m_state.expressionClasses().find(Instruction::ISZERO, {condition});
+ if (m_state.expressionClasses().knownToBeDifferent(negatedCondition, zero))
{
AssemblyItem it(Instruction::POP, location);
feedItem(it, true);
@@ -186,148 +95,6 @@ void CommonSubexpressionEliminator::optimizeBreakingItem()
}
}
-void CommonSubexpressionEliminator::setStackElement(int _stackHeight, Id _class)
-{
- m_stackElements[_stackHeight] = _class;
-}
-
-void CommonSubexpressionEliminator::swapStackElements(
- int _stackHeightA,
- int _stackHeightB,
- SourceLocation const& _location
-)
-{
- assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements.");
- // ensure they are created
- stackElement(_stackHeightA, _location);
- stackElement(_stackHeightB, _location);
-
- swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]);
-}
-
-ExpressionClasses::Id CommonSubexpressionEliminator::stackElement(
- int _stackHeight,
- SourceLocation const& _location
-)
-{
- if (m_stackElements.count(_stackHeight))
- return m_stackElements.at(_stackHeight);
- // Stack element not found (not assigned yet), create new equivalence class.
- return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location);
-}
-
-ExpressionClasses::Id CommonSubexpressionEliminator::initialStackElement(
- int _stackHeight,
- SourceLocation const& _location
-)
-{
- assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested.");
- assertThrow(_stackHeight > -16, StackTooDeepException, "");
- // This is a special assembly item that refers to elements pre-existing on the initial stack.
- return m_expressionClasses.find(AssemblyItem(dupInstruction(1 - _stackHeight), _location));
-}
-
-void CommonSubexpressionEliminator::storeInStorage(Id _slot, Id _value, SourceLocation const& _location)
-{
- if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value)
- // do not execute the storage if we know that the value is already there
- return;
- m_sequenceNumber++;
- decltype(m_storageContent) storageContents;
- // Copy over all values (i.e. retain knowledge about them) where we know that this store
- // operation will not destroy the knowledge. Specifically, we copy storage locations we know
- // are different from _slot or locations where we know that the stored value is equal to _value.
- for (auto const& storageItem: m_storageContent)
- if (m_expressionClasses.knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value)
- storageContents.insert(storageItem);
- m_storageContent = move(storageContents);
-
- AssemblyItem item(Instruction::SSTORE, _location);
- Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber);
- m_storeOperations.push_back(StoreOperation(StoreOperation::Storage, _slot, m_sequenceNumber, id));
- m_storageContent[_slot] = _value;
- // increment a second time so that we get unique sequence numbers for writes
- m_sequenceNumber++;
-}
-
-ExpressionClasses::Id CommonSubexpressionEliminator::loadFromStorage(Id _slot, SourceLocation const& _location)
-{
- if (m_storageContent.count(_slot))
- return m_storageContent.at(_slot);
-
- AssemblyItem item(Instruction::SLOAD, _location);
- return m_storageContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber);
-}
-
-void CommonSubexpressionEliminator::storeInMemory(Id _slot, Id _value, SourceLocation const& _location)
-{
- if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value)
- // do not execute the store if we know that the value is already there
- return;
- m_sequenceNumber++;
- decltype(m_memoryContent) memoryContents;
- // copy over values at points where we know that they are different from _slot by at least 32
- for (auto const& memoryItem: m_memoryContent)
- if (m_expressionClasses.knownToBeDifferentBy32(memoryItem.first, _slot))
- memoryContents.insert(memoryItem);
- m_memoryContent = move(memoryContents);
-
- AssemblyItem item(Instruction::MSTORE, _location);
- Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber);
- m_storeOperations.push_back(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id));
- m_memoryContent[_slot] = _value;
- // increment a second time so that we get unique sequence numbers for writes
- m_sequenceNumber++;
-}
-
-ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(Id _slot, SourceLocation const& _location)
-{
- if (m_memoryContent.count(_slot))
- return m_memoryContent.at(_slot);
-
- AssemblyItem item(Instruction::MLOAD, _location);
- return m_memoryContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber);
-}
-
-CommonSubexpressionEliminator::Id CommonSubexpressionEliminator::applySha3(
- Id _start,
- Id _length,
- SourceLocation const& _location
-)
-{
- AssemblyItem sha3Item(Instruction::SHA3, _location);
- // Special logic if length is a short constant, otherwise we cannot tell.
- u256 const* l = m_expressionClasses.knownConstant(_length);
- // unknown or too large length
- if (!l || *l > 128)
- return m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber);
-
- vector<Id> arguments;
- for (u256 i = 0; i < *l; i += 32)
- {
- Id slot = m_expressionClasses.find(
- AssemblyItem(Instruction::ADD, _location),
- {_start, m_expressionClasses.find(i)}
- );
- arguments.push_back(loadFromMemory(slot, _location));
- }
- if (m_knownSha3Hashes.count(arguments))
- return m_knownSha3Hashes.at(arguments);
- Id v;
- // If all arguments are known constants, compute the sha3 here
- if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses.knownConstant(_a); }))
- {
- bytes data;
- for (Id a: arguments)
- data += toBigEndian(*m_expressionClasses.knownConstant(a));
- data.resize(size_t(*l));
- v = m_expressionClasses.find(AssemblyItem(u256(sha3(data)), _location));
- }
- else
- v = m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber);
- return m_knownSha3Hashes[arguments] = v;
-}
-
CSECodeGenerator::CSECodeGenerator(
ExpressionClasses& _expressionClasses,
vector<CSECodeGenerator::StoreOperation> const& _storeOperations
@@ -339,10 +106,12 @@ CSECodeGenerator::CSECodeGenerator(
}
AssemblyItems CSECodeGenerator::generateCode(
+ int _initialStackHeight,
map<int, Id> const& _initialStack,
map<int, Id> const& _targetStackContents
)
{
+ m_stackHeight = _initialStackHeight;
m_stack = _initialStack;
for (auto const& item: m_stack)
if (!m_classPositions.count(item.second))
diff --git a/CommonSubexpressionEliminator.h b/CommonSubexpressionEliminator.h
index 6156bc81..6e1ba40b 100644
--- a/CommonSubexpressionEliminator.h
+++ b/CommonSubexpressionEliminator.h
@@ -32,6 +32,7 @@
#include <libdevcore/Exceptions.h>
#include <libevmasm/ExpressionClasses.h>
#include <libevmasm/SemanticInformation.h>
+#include <libevmasm/KnownState.h>
namespace dev
{
@@ -58,20 +59,9 @@ class CommonSubexpressionEliminator
{
public:
using Id = ExpressionClasses::Id;
- struct StoreOperation
- {
- enum Target { Memory, Storage };
- StoreOperation(
- Target _target,
- Id _slot,
- unsigned _sequenceNumber,
- Id _expression
- ): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {}
- Target target;
- Id slot;
- unsigned sequenceNumber;
- Id expression;
- };
+ using StoreOperation = KnownState::StoreOperation;
+
+ CommonSubexpressionEliminator(KnownState const& _state): m_initialState(_state), m_state(_state) {}
/// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first
/// item that must be fed into a new instance of the eliminator.
@@ -95,49 +85,11 @@ private:
/// Tries to optimize the item that breaks the basic block at the end.
void optimizeBreakingItem();
- /// Simplifies the given item using
- /// Assigns a new equivalence class to the next sequence number of the given stack element.
- void setStackElement(int _stackHeight, Id _class);
- /// Swaps the given stack elements in their next sequence number.
- void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location);
- /// Retrieves the current equivalence class fo the given stack element (or generates a new
- /// one if it does not exist yet).
- Id stackElement(int _stackHeight, SourceLocation const& _location);
- /// @returns the equivalence class id of the special initial stack element at the given height
- /// (must not be positive).
- Id initialStackElement(int _stackHeight, SourceLocation const& _location);
-
- /// Increments the sequence number, deletes all storage information that might be overwritten
- /// and stores the new value at the given slot.
- void storeInStorage(Id _slot, Id _value, SourceLocation const& _location);
- /// Retrieves the current value at the given slot in storage or creates a new special sload class.
- Id loadFromStorage(Id _slot, SourceLocation const& _location);
- /// Increments the sequence number, deletes all memory information that might be overwritten
- /// and stores the new value at the given slot.
- void storeInMemory(Id _slot, Id _value, SourceLocation const& _location);
- /// Retrieves the current value at the given slot in memory or creates a new special mload class.
- Id loadFromMemory(Id _slot, SourceLocation const& _location);
- /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory.
- Id applySha3(Id _start, Id _length, SourceLocation const& _location);
-
- /// Current stack height, can be negative.
- int m_stackHeight = 0;
- /// Current stack layout, mapping stack height -> equivalence class
- std::map<int, Id> m_stackElements;
- /// Current sequence number, this is incremented with each modification to storage or memory.
- unsigned m_sequenceNumber = 1;
- /// Knowledge about storage content.
- std::map<Id, Id> m_storageContent;
- /// Knowledge about memory content. Keys are memory addresses, note that the values overlap
- /// and are not contained here if they are not completely known.
- std::map<Id, Id> m_memoryContent;
- /// Keeps record of all sha3 hashes that are computed.
- std::map<std::vector<Id>, Id> m_knownSha3Hashes;
+ KnownState m_initialState;
+ KnownState m_state;
/// Keeps information about which storage or memory slots were written to at which sequence
/// number with what instruction.
std::vector<StoreOperation> m_storeOperations;
- /// Structure containing the classes of equivalent expressions.
- ExpressionClasses m_expressionClasses;
/// The item that breaks the basic block, can be nullptr.
/// It is usually appended to the block but can be optimized in some cases.
@@ -164,6 +116,7 @@ public:
/// @param _targetStackContents final contents of the stack, by stack height relative to initial
/// @note should only be called once on each object.
AssemblyItems generateCode(
+ int _initialStackHeight,
std::map<int, Id> const& _initialStack,
std::map<int, Id> const& _targetStackContents
);
@@ -199,7 +152,7 @@ private:
AssemblyItems m_generatedItems;
/// Current height of the stack relative to the start.
- int m_stackHeight = 0;
+ int m_stackHeight;
/// If (b, a) is in m_requests then b is needed to compute a.
std::multimap<Id, Id> m_neededBy;
/// Current content of the stack.
diff --git a/ControlFlowGraph.cpp b/ControlFlowGraph.cpp
index cc4367e6..2e28317a 100644
--- a/ControlFlowGraph.cpp
+++ b/ControlFlowGraph.cpp
@@ -23,9 +23,11 @@
#include <libevmasm/ControlFlowGraph.h>
#include <map>
+#include <memory>
#include <libevmasm/Exceptions.h>
#include <libevmasm/AssemblyItem.h>
#include <libevmasm/SemanticInformation.h>
+#include <libevmasm/KnownState.h>
using namespace std;
using namespace dev;
@@ -36,16 +38,17 @@ BlockId::BlockId(u256 const& _id): m_id(_id)
assertThrow( _id < initial().m_id, OptimizerException, "Tag number too large.");
}
-AssemblyItems ControlFlowGraph::optimisedItems()
+BasicBlocks ControlFlowGraph::optimisedBlocks()
{
if (m_items.empty())
- return m_items;
+ return BasicBlocks();
findLargestTag();
splitBlocks();
resolveNextLinks();
removeUnusedBlocks();
setPrevLinks();
+ gatherKnowledge();
return rebuildCode();
}
@@ -209,7 +212,78 @@ void ControlFlowGraph::setPrevLinks()
}
}
-AssemblyItems ControlFlowGraph::rebuildCode()
+void ControlFlowGraph::gatherKnowledge()
+{
+ // @todo actually we know that memory is filled with zeros at the beginning,
+ // we could make use of that.
+ KnownStatePointer emptyState = make_shared<KnownState>();
+ ExpressionClasses& expr = emptyState->expressionClasses();
+ bool unknownJumpEncountered = false;
+
+ vector<pair<BlockId, KnownStatePointer>> workQueue({make_pair(BlockId::initial(), emptyState->copy())});
+ while (!workQueue.empty())
+ {
+ //@todo we might have to do something like incrementing the sequence number for each JUMPDEST
+ assertThrow(!!workQueue.back().first, OptimizerException, "");
+ BasicBlock& block = m_blocks.at(workQueue.back().first);
+ KnownStatePointer state = workQueue.back().second;
+ workQueue.pop_back();
+ if (block.startState)
+ {
+ state->reduceToCommonKnowledge(*block.startState);
+ if (*state == *block.startState)
+ continue;
+ }
+
+ block.startState = state->copy();
+ //@todo we might know the return address for the first pass, but not anymore for the second,
+ // -> store knowledge about tags as a union.
+
+ // Feed all items except for the final jump yet because it will erase the target tag.
+ unsigned pc = block.begin;
+ while (pc < block.end && !SemanticInformation::altersControlFlow(m_items.at(pc)))
+ state->feedItem(m_items.at(pc++));
+
+ if (
+ block.endType == BasicBlock::EndType::JUMP ||
+ block.endType == BasicBlock::EndType::JUMPI
+ )
+ {
+ assertThrow(block.begin <= pc && pc == block.end - 1, OptimizerException, "");
+ //@todo in the case of JUMPI, add knowledge about the condition to the state
+ // (for both values of the condition)
+ BlockId nextBlock = expressionClassToBlockId(
+ state->stackElement(state->stackHeight(), SourceLocation()),
+ expr
+ );
+ state->feedItem(m_items.at(pc++));
+ if (nextBlock)
+ workQueue.push_back(make_pair(nextBlock, state->copy()));
+ else if (!unknownJumpEncountered)
+ {
+ // We do not know where this jump goes, so we have to reset the states of all
+ // JUMPDESTs.
+ unknownJumpEncountered = true;
+ for (auto const& it: m_blocks)
+ if (it.second.begin < it.second.end && m_items[it.second.begin].type() == Tag)
+ workQueue.push_back(make_pair(it.first, emptyState->copy()));
+ }
+ }
+ else if (block.begin <= pc && pc < block.end)
+ state->feedItem(m_items.at(pc++));
+ assertThrow(block.end <= block.begin || pc == block.end, OptimizerException, "");
+
+ block.endState = state;
+
+ if (
+ block.endType == BasicBlock::EndType::HANDOVER ||
+ block.endType == BasicBlock::EndType::JUMPI
+ )
+ workQueue.push_back(make_pair(block.next, state->copy()));
+ }
+}
+
+BasicBlocks ControlFlowGraph::rebuildCode()
{
map<BlockId, unsigned> pushes;
for (auto& idAndBlock: m_blocks)
@@ -220,7 +294,7 @@ AssemblyItems ControlFlowGraph::rebuildCode()
for (auto it: m_blocks)
blocksToAdd.insert(it.first);
set<BlockId> blocksAdded;
- AssemblyItems code;
+ BasicBlocks blocks;
for (
BlockId blockId = BlockId::initial();
@@ -233,23 +307,34 @@ AssemblyItems ControlFlowGraph::rebuildCode()
blockId = m_blocks.at(blockId).prev;
for (; blockId; blockId = m_blocks.at(blockId).next)
{
- BasicBlock const& block = m_blocks.at(blockId);
+ BasicBlock& block = m_blocks.at(blockId);
blocksToAdd.erase(blockId);
blocksAdded.insert(blockId);
- auto begin = m_items.begin() + block.begin;
- auto end = m_items.begin() + block.end;
- if (begin == end)
+ if (block.begin == block.end)
continue;
// If block starts with unused tag, skip it.
- if (previousHandedOver && !pushes[blockId] && begin->type() == Tag)
- ++begin;
+ if (previousHandedOver && !pushes[blockId] && m_items[block.begin].type() == Tag)
+ ++block.begin;
+ if (block.begin < block.end)
+ blocks.push_back(block);
previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER);
- copy(begin, end, back_inserter(code));
}
}
- return code;
+ return blocks;
+}
+
+BlockId ControlFlowGraph::expressionClassToBlockId(
+ ExpressionClasses::Id _id,
+ ExpressionClasses& _exprClasses
+)
+{
+ ExpressionClasses::Expression expr = _exprClasses.representative(_id);
+ if (expr.item && expr.item->type() == PushTag)
+ return BlockId(expr.item->data());
+ else
+ return BlockId::invalid();
}
BlockId ControlFlowGraph::generateNewId()
diff --git a/ControlFlowGraph.h b/ControlFlowGraph.h
index 5d16df32..3366dc45 100644
--- a/ControlFlowGraph.h
+++ b/ControlFlowGraph.h
@@ -24,16 +24,18 @@
#pragma once
#include <vector>
+#include <memory>
#include <libdevcore/Common.h>
#include <libdevcore/Assertions.h>
+#include <libevmasm/ExpressionClasses.h>
namespace dev
{
namespace eth
{
-class AssemblyItem;
-using AssemblyItems = std::vector<AssemblyItem>;
+class KnownState;
+using KnownStatePointer = std::shared_ptr<KnownState>;
/**
* Identifier for a block, coincides with the tag number of an AssemblyItem but adds a special
@@ -69,32 +71,46 @@ struct BasicBlock
unsigned end = 0;
/// Tags pushed inside this block, with multiplicity.
std::vector<BlockId> pushedTags;
- /// ID of the block that always follows this one (either JUMP or flow into new block),
- /// or BlockId::invalid() otherwise
+ /// ID of the block that always follows this one (either non-branching part of JUMPI or flow
+ /// into new block), or BlockId::invalid() otherwise
BlockId next = BlockId::invalid();
- /// ID of the block that has to precede this one.
+ /// ID of the block that has to precede this one (because control flows into it).
BlockId prev = BlockId::invalid();
enum class EndType { JUMP, JUMPI, STOP, HANDOVER };
EndType endType = EndType::HANDOVER;
+
+ /// Knowledge about the state when this block is entered. Intersection of all possible ways
+ /// to enter this block.
+ KnownStatePointer startState;
+ /// Knowledge about the state at the end of this block.
+ KnownStatePointer endState;
};
+using BasicBlocks = std::vector<BasicBlock>;
+
class ControlFlowGraph
{
public:
/// Initializes the control flow graph.
/// @a _items has to persist across the usage of this class.
ControlFlowGraph(AssemblyItems const& _items): m_items(_items) {}
- /// @returns the collection of optimised items, should be called only once.
- AssemblyItems optimisedItems();
+ /// @returns vector of basic blocks in the order they should be used in the final code.
+ /// Should be called only once.
+ BasicBlocks optimisedBlocks();
private:
void findLargestTag();
void splitBlocks();
void resolveNextLinks();
void removeUnusedBlocks();
+ void gatherKnowledge();
void setPrevLinks();
- AssemblyItems rebuildCode();
+ BasicBlocks rebuildCode();
+
+ /// @returns the corresponding BlockId if _id is a pushed jump tag,
+ /// and an invalid BlockId otherwise.
+ BlockId expressionClassToBlockId(ExpressionClasses::Id _id, ExpressionClasses& _exprClasses);
BlockId generateNewId();
diff --git a/ExpressionClasses.cpp b/ExpressionClasses.cpp
index 1e60a7fe..cfbeba7f 100644
--- a/ExpressionClasses.cpp
+++ b/ExpressionClasses.cpp
@@ -37,6 +37,7 @@ using namespace dev::eth;
bool ExpressionClasses::Expression::operator<(ExpressionClasses::Expression const& _other) const
{
+ assertThrow(!!item && !!_other.item, OptimizerException, "");
auto type = item->type();
auto otherType = _other.item->type();
return std::tie(type, item->data(), arguments, sequenceNumber) <
@@ -56,12 +57,15 @@ ExpressionClasses::Id ExpressionClasses::find(
exp.arguments = _arguments;
exp.sequenceNumber = _sequenceNumber;
- if (SemanticInformation::isCommutativeOperation(_item))
- sort(exp.arguments.begin(), exp.arguments.end());
+ if (SemanticInformation::isDeterministic(_item))
+ {
+ if (SemanticInformation::isCommutativeOperation(_item))
+ sort(exp.arguments.begin(), exp.arguments.end());
- auto it = m_expressions.find(exp);
- if (it != m_expressions.end())
- return it->id;
+ auto it = m_expressions.find(exp);
+ if (it != m_expressions.end())
+ return it->id;
+ }
if (_copyItem)
exp.item = storeItem(_item);
@@ -122,10 +126,16 @@ string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const
{
Expression const& expr = representative(_id);
stringstream str;
- str << dec << expr.id << ":" << *expr.item << "(";
- for (Id arg: expr.arguments)
- str << fullDAGToString(arg) << ",";
- str << ")";
+ str << dec << expr.id << ":";
+ if (expr.item)
+ {
+ str << *expr.item << "(";
+ for (Id arg: expr.arguments)
+ str << fullDAGToString(arg) << ",";
+ str << ")";
+ }
+ else
+ str << " UNIQUE";
return str.str();
}
@@ -279,7 +289,11 @@ ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr,
{
static Rules rules;
- if (_expr.item->type() != Operation)
+ if (
+ !_expr.item ||
+ _expr.item->type() != Operation ||
+ !SemanticInformation::isDeterministic(*_expr.item)
+ )
return -1;
for (auto const& rule: rules.rules())
@@ -337,7 +351,7 @@ void Pattern::setMatchGroup(unsigned _group, map<unsigned, Expression const*>& _
bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const
{
- if (!matchesBaseItem(*_expr.item))
+ if (!matchesBaseItem(_expr.item))
return false;
if (m_matchGroup)
{
@@ -387,13 +401,15 @@ string Pattern::toString() const
return s.str();
}
-bool Pattern::matchesBaseItem(AssemblyItem const& _item) const
+bool Pattern::matchesBaseItem(AssemblyItem const* _item) const
{
if (m_type == UndefinedItem)
return true;
- if (m_type != _item.type())
+ if (!_item)
+ return false;
+ if (m_type != _item->type())
return false;
- if (m_requireDataMatch && m_data != _item.data())
+ if (m_requireDataMatch && m_data != _item->data())
return false;
return true;
}
diff --git a/ExpressionClasses.h b/ExpressionClasses.h
index 2f720f60..c8352030 100644
--- a/ExpressionClasses.h
+++ b/ExpressionClasses.h
@@ -50,7 +50,7 @@ public:
struct Expression
{
Id id;
- AssemblyItem const* item;
+ AssemblyItem const* item = nullptr;
Ids arguments;
unsigned sequenceNumber; ///< Storage modification sequence, only used for SLOAD/SSTORE instructions.
/// Behaves as if this was a tuple of (item->type(), item->data(), arguments, sequenceNumber).
@@ -149,7 +149,7 @@ public:
std::string toString() const;
private:
- bool matchesBaseItem(AssemblyItem const& _item) const;
+ bool matchesBaseItem(AssemblyItem const* _item) const;
Expression const& matchGroupValue() const;
AssemblyItemType m_type;
diff --git a/KnownState.cpp b/KnownState.cpp
new file mode 100644
index 00000000..41ac4802
--- /dev/null
+++ b/KnownState.cpp
@@ -0,0 +1,326 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * @file KnownState.cpp
+ * @author Christian <c@ethdev.com>
+ * @date 2015
+ * Contains knowledge about the state of the virtual machine at a specific instruction.
+ */
+
+#include "KnownState.h"
+#include <functional>
+#include <libdevcrypto/SHA3.h>
+#include <libevmasm/AssemblyItem.h>
+
+using namespace std;
+using namespace dev;
+using namespace dev::eth;
+
+ostream& KnownState::stream(ostream& _out) const
+{
+ auto streamExpressionClass = [this](ostream& _out, Id _id)
+ {
+ auto const& expr = m_expressionClasses->representative(_id);
+ _out << " " << dec << _id << ": ";
+ if (!expr.item)
+ _out << " no item";
+ else if (expr.item->type() == UndefinedItem)
+ _out << " unknown " << int(expr.item->data());
+ else
+ _out << *expr.item;
+ if (expr.sequenceNumber)
+ _out << "@" << dec << expr.sequenceNumber;
+ _out << "(";
+ for (Id arg: expr.arguments)
+ _out << dec << arg << ",";
+ _out << ")" << endl;
+ };
+
+ _out << "=== State ===" << endl;
+ _out << "Stack height: " << dec << m_stackHeight << endl;
+ _out << "Equivalence classes: " << endl;
+ for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass)
+ streamExpressionClass(_out, eqClass);
+
+ _out << "Stack: " << endl;
+ for (auto const& it: m_stackElements)
+ {
+ _out << " " << dec << it.first << ": ";
+ streamExpressionClass(_out, it.second);
+ }
+ _out << "Storage: " << endl;
+ for (auto const& it: m_storageContent)
+ {
+ _out << " ";
+ streamExpressionClass(_out, it.first);
+ _out << ": ";
+ streamExpressionClass(_out, it.second);
+ }
+ _out << "Memory: " << endl;
+ for (auto const& it: m_memoryContent)
+ {
+ _out << " ";
+ streamExpressionClass(_out, it.first);
+ _out << ": ";
+ streamExpressionClass(_out, it.second);
+ }
+
+ return _out;
+}
+
+KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem)
+{
+ StoreOperation op;
+ if (_item.type() == Tag)
+ {
+ // can be ignored
+ }
+ else if (_item.type() != Operation)
+ {
+ assertThrow(_item.deposit() == 1, InvalidDeposit, "");
+ setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem));
+ }
+ else
+ {
+ Instruction instruction = _item.instruction();
+ InstructionInfo info = instructionInfo(instruction);
+ if (SemanticInformation::isDupInstruction(_item))
+ setStackElement(
+ m_stackHeight + 1,
+ stackElement(
+ m_stackHeight - int(instruction) + int(Instruction::DUP1),
+ _item.getLocation()
+ )
+ );
+ else if (SemanticInformation::isSwapInstruction(_item))
+ swapStackElements(
+ m_stackHeight,
+ m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1),
+ _item.getLocation()
+ );
+ else if (instruction != Instruction::POP)
+ {
+ vector<Id> arguments(info.args);
+ for (int i = 0; i < info.args; ++i)
+ arguments[i] = stackElement(m_stackHeight - i, _item.getLocation());
+
+ if (_item.instruction() == Instruction::SSTORE)
+ op = storeInStorage(arguments[0], arguments[1], _item.getLocation());
+ else if (_item.instruction() == Instruction::SLOAD)
+ setStackElement(
+ m_stackHeight + _item.deposit(),
+ loadFromStorage(arguments[0], _item.getLocation())
+ );
+ else if (_item.instruction() == Instruction::MSTORE)
+ op = storeInMemory(arguments[0], arguments[1], _item.getLocation());
+ else if (_item.instruction() == Instruction::MLOAD)
+ setStackElement(
+ m_stackHeight + _item.deposit(),
+ loadFromMemory(arguments[0], _item.getLocation())
+ );
+ else if (_item.instruction() == Instruction::SHA3)
+ setStackElement(
+ m_stackHeight + _item.deposit(),
+ applySha3(arguments.at(0), arguments.at(1), _item.getLocation())
+ );
+ else
+ {
+ if (SemanticInformation::invalidatesMemory(_item.instruction()))
+ resetMemory();
+ if (SemanticInformation::invalidatesStorage(_item.instruction()))
+ resetStorage();
+ assertThrow(info.ret <= 1, InvalidDeposit, "");
+ if (info.ret == 1)
+ setStackElement(
+ m_stackHeight + _item.deposit(),
+ m_expressionClasses->find(_item, arguments, _copyItem)
+ );
+ }
+ }
+ m_stackElements.erase(
+ m_stackElements.upper_bound(m_stackHeight + _item.deposit()),
+ m_stackElements.end()
+ );
+ m_stackHeight += _item.deposit();
+ }
+ return op;
+}
+
+void KnownState::reduceToCommonKnowledge(KnownState const& /*_other*/)
+{
+ //@todo
+ *this = KnownState(m_expressionClasses);
+}
+
+bool KnownState::operator==(const KnownState& _other) const
+{
+ //@todo
+ return (
+ m_stackElements.empty() &&
+ _other.m_stackElements.empty() &&
+ m_storageContent.empty() &&
+ _other.m_storageContent.empty() &&
+ m_memoryContent.empty() &&
+ _other.m_memoryContent.empty()
+ );
+}
+
+ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location)
+{
+ if (m_stackElements.count(_stackHeight))
+ return m_stackElements.at(_stackHeight);
+ // Stack element not found (not assigned yet), create new unknown equivalence class.
+ //@todo check that we do not infer incorrect equivalences when the stack is cleared partially
+ //in between.
+ return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location);
+}
+
+ExpressionClasses::Id KnownState::initialStackElement(
+ int _stackHeight,
+ SourceLocation const& _location
+)
+{
+ // This is a special assembly item that refers to elements pre-existing on the initial stack.
+ return m_expressionClasses->find(AssemblyItem(UndefinedItem, u256(_stackHeight), _location));
+}
+
+void KnownState::setStackElement(int _stackHeight, Id _class)
+{
+ m_stackElements[_stackHeight] = _class;
+}
+
+void KnownState::swapStackElements(
+ int _stackHeightA,
+ int _stackHeightB,
+ SourceLocation const& _location
+)
+{
+ assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements.");
+ // ensure they are created
+ stackElement(_stackHeightA, _location);
+ stackElement(_stackHeightB, _location);
+
+ swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]);
+}
+
+KnownState::StoreOperation KnownState::storeInStorage(
+ Id _slot,
+ Id _value,
+ SourceLocation const& _location)
+{
+ if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value)
+ // do not execute the storage if we know that the value is already there
+ return StoreOperation();
+ m_sequenceNumber++;
+ decltype(m_storageContent) storageContents;
+ // Copy over all values (i.e. retain knowledge about them) where we know that this store
+ // operation will not destroy the knowledge. Specifically, we copy storage locations we know
+ // are different from _slot or locations where we know that the stored value is equal to _value.
+ for (auto const& storageItem: m_storageContent)
+ if (m_expressionClasses->knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value)
+ storageContents.insert(storageItem);
+ m_storageContent = move(storageContents);
+
+ AssemblyItem item(Instruction::SSTORE, _location);
+ Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber);
+ StoreOperation operation(StoreOperation::Storage, _slot, m_sequenceNumber, id);
+ m_storageContent[_slot] = _value;
+ // increment a second time so that we get unique sequence numbers for writes
+ m_sequenceNumber++;
+
+ return operation;
+}
+
+ExpressionClasses::Id KnownState::loadFromStorage(Id _slot, SourceLocation const& _location)
+{
+ if (m_storageContent.count(_slot))
+ return m_storageContent.at(_slot);
+
+ AssemblyItem item(Instruction::SLOAD, _location);
+ return m_storageContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber);
+}
+
+KnownState::StoreOperation KnownState::storeInMemory(Id _slot, Id _value, SourceLocation const& _location)
+{
+ if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value)
+ // do not execute the store if we know that the value is already there
+ return StoreOperation();
+ m_sequenceNumber++;
+ decltype(m_memoryContent) memoryContents;
+ // copy over values at points where we know that they are different from _slot by at least 32
+ for (auto const& memoryItem: m_memoryContent)
+ if (m_expressionClasses->knownToBeDifferentBy32(memoryItem.first, _slot))
+ memoryContents.insert(memoryItem);
+ m_memoryContent = move(memoryContents);
+
+ AssemblyItem item(Instruction::MSTORE, _location);
+ Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber);
+ StoreOperation operation(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id));
+ m_memoryContent[_slot] = _value;
+ // increment a second time so that we get unique sequence numbers for writes
+ m_sequenceNumber++;
+ return operation;
+}
+
+ExpressionClasses::Id KnownState::loadFromMemory(Id _slot, SourceLocation const& _location)
+{
+ if (m_memoryContent.count(_slot))
+ return m_memoryContent.at(_slot);
+
+ AssemblyItem item(Instruction::MLOAD, _location);
+ return m_memoryContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber);
+}
+
+KnownState::Id KnownState::applySha3(
+ Id _start,
+ Id _length,
+ SourceLocation const& _location
+)
+{
+ AssemblyItem sha3Item(Instruction::SHA3, _location);
+ // Special logic if length is a short constant, otherwise we cannot tell.
+ u256 const* l = m_expressionClasses->knownConstant(_length);
+ // unknown or too large length
+ if (!l || *l > 128)
+ return m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber);
+
+ vector<Id> arguments;
+ for (u256 i = 0; i < *l; i += 32)
+ {
+ Id slot = m_expressionClasses->find(
+ AssemblyItem(Instruction::ADD, _location),
+ {_start, m_expressionClasses->find(i)}
+ );
+ arguments.push_back(loadFromMemory(slot, _location));
+ }
+ if (m_knownSha3Hashes.count(arguments))
+ return m_knownSha3Hashes.at(arguments);
+ Id v;
+ // If all arguments are known constants, compute the sha3 here
+ if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses->knownConstant(_a); }))
+ {
+ bytes data;
+ for (Id a: arguments)
+ data += toBigEndian(*m_expressionClasses->knownConstant(a));
+ data.resize(size_t(*l));
+ v = m_expressionClasses->find(AssemblyItem(u256(sha3(data)), _location));
+ }
+ else
+ v = m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber);
+ return m_knownSha3Hashes[arguments] = v;
+}
+
diff --git a/KnownState.h b/KnownState.h
new file mode 100644
index 00000000..f7a3dd67
--- /dev/null
+++ b/KnownState.h
@@ -0,0 +1,163 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * @file KnownState.h
+ * @author Christian <c@ethdev.com>
+ * @date 2015
+ * Contains knowledge about the state of the virtual machine at a specific instruction.
+ */
+
+#pragma once
+
+#include <vector>
+#include <map>
+#include <set>
+#include <tuple>
+#include <memory>
+#include <ostream>
+#include <libdevcore/CommonIO.h>
+#include <libdevcore/Exceptions.h>
+#include <libevmasm/ExpressionClasses.h>
+#include <libevmasm/SemanticInformation.h>
+
+namespace dev
+{
+namespace eth
+{
+
+class AssemblyItem;
+using AssemblyItems = std::vector<AssemblyItem>;
+
+/**
+ * Class to infer and store knowledge about the state of the virtual machine at a specific
+ * instruction.
+ *
+ * The general workings are that for each assembly item that is fed, an equivalence class is
+ * derived from the operation and the equivalence class of its arguments. DUPi, SWAPi and some
+ * arithmetic instructions are used to infer equivalences while these classes are determined.
+ */
+class KnownState
+{
+public:
+ using Id = ExpressionClasses::Id;
+ struct StoreOperation
+ {
+ enum Target { Invalid, Memory, Storage };
+ StoreOperation(): target(Invalid), sequenceNumber(-1) {}
+ StoreOperation(
+ Target _target,
+ Id _slot,
+ unsigned _sequenceNumber,
+ Id _expression
+ ): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {}
+ bool isValid() const { return target != Invalid; }
+ Target target;
+ Id slot;
+ unsigned sequenceNumber;
+ Id expression;
+ };
+
+ explicit KnownState(
+ std::shared_ptr<ExpressionClasses> _expressionClasses = std::make_shared<ExpressionClasses>()
+ ): m_expressionClasses(_expressionClasses)
+ {
+ }
+
+ /// Streams debugging information to @a _out.
+ std::ostream& stream(std::ostream& _out) const;
+
+ /// Feeds the item into the system for analysis.
+ /// @returns a possible store operation
+ StoreOperation feedItem(AssemblyItem const& _item, bool _copyItem = false);
+
+ /// Resets any knowledge about storage.
+ void resetStorage() { m_storageContent.clear(); }
+ /// Resets any knowledge about storage.
+ void resetMemory() { m_memoryContent.clear(); }
+ /// Resets any knowledge about the current stack.
+ void resetStack() { m_stackElements.clear(); m_stackHeight = 0; }
+ /// Resets any knowledge.
+ void reset() { resetStorage(); resetMemory(); resetStack(); }
+
+ /// Manually increments the storage and memory sequence number.
+ void incrementSequenceNumber() { m_sequenceNumber += 2; }
+
+ /// Replaces the state by the intersection with _other, i.e. only equal knowledge is retained.
+ /// If the stack heighht is different, the smaller one is used and the stack is compared
+ /// relatively.
+ void reduceToCommonKnowledge(KnownState const& _other);
+
+ /// @returns a shared pointer to a copy of this state.
+ std::shared_ptr<KnownState> copy() const { return std::make_shared<KnownState>(*this); }
+
+ /// @returns true if the knowledge about the state of both objects is (known to be) equal.
+ bool operator==(KnownState const& _other) const;
+
+ ///@todo the sequence numbers in two copies of this class should never be the same.
+ /// might be doable using two-dimensional sequence numbers, where the first value is incremented
+ /// for each copy
+
+ /// Retrieves the current equivalence class fo the given stack element (or generates a new
+ /// one if it does not exist yet).
+ Id stackElement(int _stackHeight, SourceLocation const& _location);
+ /// @returns the equivalence class id of the special initial stack element at the given height.
+ Id initialStackElement(int _stackHeight, SourceLocation const& _location);
+
+ int stackHeight() const { return m_stackHeight; }
+ std::map<int, Id> const& stackElements() const { return m_stackElements; }
+ ExpressionClasses& expressionClasses() const { return *m_expressionClasses; }
+
+private:
+ /// Assigns a new equivalence class to the next sequence number of the given stack element.
+ void setStackElement(int _stackHeight, Id _class);
+ /// Swaps the given stack elements in their next sequence number.
+ void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location);
+
+ /// Increments the sequence number, deletes all storage information that might be overwritten
+ /// and stores the new value at the given slot.
+ /// @returns the store operation, which might be invalid if storage was not modified
+ StoreOperation storeInStorage(Id _slot, Id _value, SourceLocation const& _location);
+ /// Retrieves the current value at the given slot in storage or creates a new special sload class.
+ Id loadFromStorage(Id _slot, SourceLocation const& _location);
+ /// Increments the sequence number, deletes all memory information that might be overwritten
+ /// and stores the new value at the given slot.
+ /// @returns the store operation, which might be invalid if memory was not modified
+ StoreOperation storeInMemory(Id _slot, Id _value, SourceLocation const& _location);
+ /// Retrieves the current value at the given slot in memory or creates a new special mload class.
+ Id loadFromMemory(Id _slot, SourceLocation const& _location);
+ /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory.
+ Id applySha3(Id _start, Id _length, SourceLocation const& _location);
+
+ /// Current stack height, can be negative.
+ int m_stackHeight = 0;
+ /// Current stack layout, mapping stack height -> equivalence class
+ std::map<int, Id> m_stackElements;
+ /// Current sequence number, this is incremented with each modification to storage or memory.
+ unsigned m_sequenceNumber = 1;
+ /// Knowledge about storage content.
+ std::map<Id, Id> m_storageContent;
+ /// Knowledge about memory content. Keys are memory addresses, note that the values overlap
+ /// and are not contained here if they are not completely known.
+ std::map<Id, Id> m_memoryContent;
+ /// Keeps record of all sha3 hashes that are computed.
+ std::map<std::vector<Id>, Id> m_knownSha3Hashes;
+ /// Structure containing the classes of equivalent expressions.
+ std::shared_ptr<ExpressionClasses> m_expressionClasses;
+};
+
+}
+}
diff --git a/SemanticInformation.cpp b/SemanticInformation.cpp
index 83d59efc..056162b5 100644
--- a/SemanticInformation.cpp
+++ b/SemanticInformation.cpp
@@ -122,3 +122,56 @@ bool SemanticInformation::altersControlFlow(AssemblyItem const& _item)
return false;
}
}
+
+
+bool SemanticInformation::isDeterministic(AssemblyItem const& _item)
+{
+ if (_item.type() != Operation)
+ return true;
+
+ switch (_item.instruction())
+ {
+ case Instruction::CALL:
+ case Instruction::CALLCODE:
+ case Instruction::CREATE:
+ case Instruction::GAS:
+ case Instruction::PC:
+ case Instruction::MSIZE: // depends on previous writes and reads, not only on content
+ case Instruction::BALANCE: // depends on previous calls
+ case Instruction::EXTCODESIZE:
+ return false;
+ default:
+ return true;
+ }
+}
+
+bool SemanticInformation::invalidatesMemory(Instruction _instruction)
+{
+ switch (_instruction)
+ {
+ case Instruction::CALLDATACOPY:
+ case Instruction::CODECOPY:
+ case Instruction::EXTCODECOPY:
+ case Instruction::MSTORE:
+ case Instruction::MSTORE8:
+ case Instruction::CALL:
+ case Instruction::CALLCODE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SemanticInformation::invalidatesStorage(Instruction _instruction)
+{
+ switch (_instruction)
+ {
+ case Instruction::CALL:
+ case Instruction::CALLCODE:
+ case Instruction::CREATE:
+ case Instruction::SSTORE:
+ return true;
+ default:
+ return false;
+ }
+}
diff --git a/SemanticInformation.h b/SemanticInformation.h
index 27aa6f1a..094f4591 100644
--- a/SemanticInformation.h
+++ b/SemanticInformation.h
@@ -23,6 +23,7 @@
#pragma once
+#include <libevmcore/Instruction.h>
namespace dev
{
@@ -45,6 +46,13 @@ struct SemanticInformation
static bool isSwapInstruction(AssemblyItem const& _item);
static bool isJumpInstruction(AssemblyItem const& _item);
static bool altersControlFlow(AssemblyItem const& _item);
+ /// @returns false if the value put on the stack by _item depends on anything else than
+ /// the information in the current block header, memory, storage or stack.
+ static bool isDeterministic(AssemblyItem const& _item);
+ /// @returns true if the given instruction modifies memory.
+ static bool invalidatesMemory(Instruction _instruction);
+ /// @returns true if the given instruction modifies storage (even indirectly).
+ static bool invalidatesStorage(Instruction _instruction);
};
}