aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Assembly.cpp3
-rw-r--r--CommonSubexpressionEliminator.cpp267
-rw-r--r--CommonSubexpressionEliminator.h59
-rw-r--r--KnownState.cpp278
-rw-r--r--KnownState.h149
5 files changed, 451 insertions, 305 deletions
diff --git a/Assembly.cpp b/Assembly.cpp
index 6cc09a4b..c7253622 100644
--- a/Assembly.cpp
+++ b/Assembly.cpp
@@ -329,7 +329,8 @@ Assembly& Assembly::optimise(bool _enable)
copt << "Performing common subexpression elimination...";
for (auto iter = m_items.begin(); iter != m_items.end();)
{
- CommonSubexpressionEliminator eliminator;
+ KnownState state;
+ CommonSubexpressionEliminator eliminator(state);
auto orig = iter;
iter = eliminator.feedItems(iter, m_items.end());
AssemblyItems optItems;
diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp
index 63524d6f..645a426d 100644
--- a/CommonSubexpressionEliminator.cpp
+++ b/CommonSubexpressionEliminator.cpp
@@ -37,18 +37,19 @@ vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems()
map<int, Id> initialStackContents;
map<int, Id> targetStackContents;
- int minHeight = m_stackHeight + 1;
- if (!m_stackElements.empty())
- minHeight = min(minHeight, m_stackElements.begin()->first);
+ int minHeight = m_state.stackHeight() + 1;
+ if (!m_state.stackElements().empty())
+ minHeight = min(minHeight, m_state.stackElements().begin()->first);
for (int height = minHeight; height <= 0; ++height)
- initialStackContents[height] = initialStackElement(height, SourceLocation());
- for (int height = minHeight; height <= m_stackHeight; ++height)
- targetStackContents[height] = stackElement(height, SourceLocation());
+ //@todo this is not nice as it is here - should be "unknownStackElement" - but is it really unknown?
+ initialStackContents[height] = m_state.initialStackElement(height, SourceLocation());
+ for (int height = minHeight; height <= m_state.stackHeight(); ++height)
+ targetStackContents[height] = m_state.stackElement(height, SourceLocation());
// Debug info:
//stream(cout, initialStackContents, targetStackContents);
- AssemblyItems items = CSECodeGenerator(m_expressionClasses, m_storeOperations).generateCode(
+ AssemblyItems items = CSECodeGenerator(m_state.expressionClasses(), m_storeOperations).generateCode(
initialStackContents,
targetStackContents
);
@@ -57,103 +58,11 @@ vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems()
return items;
}
-ostream& CommonSubexpressionEliminator::stream(
- ostream& _out,
- map<int, Id> _initialStack,
- map<int, Id> _targetStack
-) const
-{
- auto streamExpressionClass = [this](ostream& _out, Id _id)
- {
- auto const& expr = m_expressionClasses.representative(_id);
- _out << " " << dec << _id << ": " << *expr.item;
- if (expr.sequenceNumber)
- _out << "@" << dec << expr.sequenceNumber;
- _out << "(";
- for (Id arg: expr.arguments)
- _out << dec << arg << ",";
- _out << ")" << endl;
- };
-
- _out << "Optimizer analysis:" << endl;
- _out << "Final stack height: " << dec << m_stackHeight << endl;
- _out << "Equivalence classes: " << endl;
- for (Id eqClass = 0; eqClass < m_expressionClasses.size(); ++eqClass)
- streamExpressionClass(_out, eqClass);
-
- _out << "Initial stack: " << endl;
- for (auto const& it: _initialStack)
- {
- _out << " " << dec << it.first << ": ";
- streamExpressionClass(_out, it.second);
- }
- _out << "Target stack: " << endl;
- for (auto const& it: _targetStack)
- {
- _out << " " << dec << it.first << ": ";
- streamExpressionClass(_out, it.second);
- }
-
- return _out;
-}
-
void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _copyItem)
{
- if (_item.type() != Operation)
- {
- assertThrow(_item.deposit() == 1, InvalidDeposit, "");
- setStackElement(++m_stackHeight, m_expressionClasses.find(_item, {}, _copyItem));
- }
- else
- {
- Instruction instruction = _item.instruction();
- InstructionInfo info = instructionInfo(instruction);
- if (SemanticInformation::isDupInstruction(_item))
- setStackElement(
- m_stackHeight + 1,
- stackElement(
- m_stackHeight - int(instruction) + int(Instruction::DUP1),
- _item.getLocation()
- )
- );
- else if (SemanticInformation::isSwapInstruction(_item))
- swapStackElements(
- m_stackHeight,
- m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1),
- _item.getLocation()
- );
- else if (instruction != Instruction::POP)
- {
- vector<Id> arguments(info.args);
- for (int i = 0; i < info.args; ++i)
- arguments[i] = stackElement(m_stackHeight - i, _item.getLocation());
- if (_item.instruction() == Instruction::SSTORE)
- storeInStorage(arguments[0], arguments[1], _item.getLocation());
- else if (_item.instruction() == Instruction::SLOAD)
- setStackElement(
- m_stackHeight + _item.deposit(),
- loadFromStorage(arguments[0], _item.getLocation())
- );
- else if (_item.instruction() == Instruction::MSTORE)
- storeInMemory(arguments[0], arguments[1], _item.getLocation());
- else if (_item.instruction() == Instruction::MLOAD)
- setStackElement(
- m_stackHeight + _item.deposit(),
- loadFromMemory(arguments[0], _item.getLocation())
- );
- else if (_item.instruction() == Instruction::SHA3)
- setStackElement(
- m_stackHeight + _item.deposit(),
- applySha3(arguments.at(0), arguments.at(1), _item.getLocation())
- );
- else
- setStackElement(
- m_stackHeight + _item.deposit(),
- m_expressionClasses.find(_item, arguments, _copyItem)
- );
- }
- m_stackHeight += _item.deposit();
- }
+ StoreOperation op = m_state.feedItem(_item, _copyItem);
+ if (op.isValid())
+ m_storeOperations.push_back(op);
}
void CommonSubexpressionEliminator::optimizeBreakingItem()
@@ -164,20 +73,20 @@ void CommonSubexpressionEliminator::optimizeBreakingItem()
SourceLocation const& location = m_breakingItem->getLocation();
AssemblyItem::JumpType jumpType = m_breakingItem->getJumpType();
- Id condition = stackElement(m_stackHeight - 1, location);
- Id zero = m_expressionClasses.find(u256(0));
- if (m_expressionClasses.knownToBeDifferent(condition, zero))
+ Id condition = m_state.stackElement(m_state.stackHeight() - 1, location);
+ Id zero = m_state.expressionClasses().find(u256(0));
+ if (m_state.expressionClasses().knownToBeDifferent(condition, zero))
{
feedItem(AssemblyItem(Instruction::SWAP1, location), true);
feedItem(AssemblyItem(Instruction::POP, location), true);
AssemblyItem item(Instruction::JUMP, location);
item.setJumpType(jumpType);
- m_breakingItem = m_expressionClasses.storeItem(item);
+ m_breakingItem = m_state.expressionClasses().storeItem(item);
return;
}
- Id negatedCondition = m_expressionClasses.find(Instruction::ISZERO, {condition});
- if (m_expressionClasses.knownToBeDifferent(negatedCondition, zero))
+ Id negatedCondition = m_state.expressionClasses().find(Instruction::ISZERO, {condition});
+ if (m_state.expressionClasses().knownToBeDifferent(negatedCondition, zero))
{
AssemblyItem it(Instruction::POP, location);
feedItem(it, true);
@@ -186,148 +95,6 @@ void CommonSubexpressionEliminator::optimizeBreakingItem()
}
}
-void CommonSubexpressionEliminator::setStackElement(int _stackHeight, Id _class)
-{
- m_stackElements[_stackHeight] = _class;
-}
-
-void CommonSubexpressionEliminator::swapStackElements(
- int _stackHeightA,
- int _stackHeightB,
- SourceLocation const& _location
-)
-{
- assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements.");
- // ensure they are created
- stackElement(_stackHeightA, _location);
- stackElement(_stackHeightB, _location);
-
- swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]);
-}
-
-ExpressionClasses::Id CommonSubexpressionEliminator::stackElement(
- int _stackHeight,
- SourceLocation const& _location
-)
-{
- if (m_stackElements.count(_stackHeight))
- return m_stackElements.at(_stackHeight);
- // Stack element not found (not assigned yet), create new equivalence class.
- return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location);
-}
-
-ExpressionClasses::Id CommonSubexpressionEliminator::initialStackElement(
- int _stackHeight,
- SourceLocation const& _location
-)
-{
- assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested.");
- assertThrow(_stackHeight > -16, StackTooDeepException, "");
- // This is a special assembly item that refers to elements pre-existing on the initial stack.
- return m_expressionClasses.find(AssemblyItem(dupInstruction(1 - _stackHeight), _location));
-}
-
-void CommonSubexpressionEliminator::storeInStorage(Id _slot, Id _value, SourceLocation const& _location)
-{
- if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value)
- // do not execute the storage if we know that the value is already there
- return;
- m_sequenceNumber++;
- decltype(m_storageContent) storageContents;
- // Copy over all values (i.e. retain knowledge about them) where we know that this store
- // operation will not destroy the knowledge. Specifically, we copy storage locations we know
- // are different from _slot or locations where we know that the stored value is equal to _value.
- for (auto const& storageItem: m_storageContent)
- if (m_expressionClasses.knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value)
- storageContents.insert(storageItem);
- m_storageContent = move(storageContents);
-
- AssemblyItem item(Instruction::SSTORE, _location);
- Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber);
- m_storeOperations.push_back(StoreOperation(StoreOperation::Storage, _slot, m_sequenceNumber, id));
- m_storageContent[_slot] = _value;
- // increment a second time so that we get unique sequence numbers for writes
- m_sequenceNumber++;
-}
-
-ExpressionClasses::Id CommonSubexpressionEliminator::loadFromStorage(Id _slot, SourceLocation const& _location)
-{
- if (m_storageContent.count(_slot))
- return m_storageContent.at(_slot);
-
- AssemblyItem item(Instruction::SLOAD, _location);
- return m_storageContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber);
-}
-
-void CommonSubexpressionEliminator::storeInMemory(Id _slot, Id _value, SourceLocation const& _location)
-{
- if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value)
- // do not execute the store if we know that the value is already there
- return;
- m_sequenceNumber++;
- decltype(m_memoryContent) memoryContents;
- // copy over values at points where we know that they are different from _slot by at least 32
- for (auto const& memoryItem: m_memoryContent)
- if (m_expressionClasses.knownToBeDifferentBy32(memoryItem.first, _slot))
- memoryContents.insert(memoryItem);
- m_memoryContent = move(memoryContents);
-
- AssemblyItem item(Instruction::MSTORE, _location);
- Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber);
- m_storeOperations.push_back(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id));
- m_memoryContent[_slot] = _value;
- // increment a second time so that we get unique sequence numbers for writes
- m_sequenceNumber++;
-}
-
-ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(Id _slot, SourceLocation const& _location)
-{
- if (m_memoryContent.count(_slot))
- return m_memoryContent.at(_slot);
-
- AssemblyItem item(Instruction::MLOAD, _location);
- return m_memoryContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber);
-}
-
-CommonSubexpressionEliminator::Id CommonSubexpressionEliminator::applySha3(
- Id _start,
- Id _length,
- SourceLocation const& _location
-)
-{
- AssemblyItem sha3Item(Instruction::SHA3, _location);
- // Special logic if length is a short constant, otherwise we cannot tell.
- u256 const* l = m_expressionClasses.knownConstant(_length);
- // unknown or too large length
- if (!l || *l > 128)
- return m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber);
-
- vector<Id> arguments;
- for (u256 i = 0; i < *l; i += 32)
- {
- Id slot = m_expressionClasses.find(
- AssemblyItem(Instruction::ADD, _location),
- {_start, m_expressionClasses.find(i)}
- );
- arguments.push_back(loadFromMemory(slot, _location));
- }
- if (m_knownSha3Hashes.count(arguments))
- return m_knownSha3Hashes.at(arguments);
- Id v;
- // If all arguments are known constants, compute the sha3 here
- if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses.knownConstant(_a); }))
- {
- bytes data;
- for (Id a: arguments)
- data += toBigEndian(*m_expressionClasses.knownConstant(a));
- data.resize(size_t(*l));
- v = m_expressionClasses.find(AssemblyItem(u256(sha3(data)), _location));
- }
- else
- v = m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber);
- return m_knownSha3Hashes[arguments] = v;
-}
-
CSECodeGenerator::CSECodeGenerator(
ExpressionClasses& _expressionClasses,
vector<CSECodeGenerator::StoreOperation> const& _storeOperations
diff --git a/CommonSubexpressionEliminator.h b/CommonSubexpressionEliminator.h
index 6156bc81..2ed92640 100644
--- a/CommonSubexpressionEliminator.h
+++ b/CommonSubexpressionEliminator.h
@@ -32,6 +32,7 @@
#include <libdevcore/Exceptions.h>
#include <libevmasm/ExpressionClasses.h>
#include <libevmasm/SemanticInformation.h>
+#include <libevmasm/KnownState.h>
namespace dev
{
@@ -58,20 +59,9 @@ class CommonSubexpressionEliminator
{
public:
using Id = ExpressionClasses::Id;
- struct StoreOperation
- {
- enum Target { Memory, Storage };
- StoreOperation(
- Target _target,
- Id _slot,
- unsigned _sequenceNumber,
- Id _expression
- ): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {}
- Target target;
- Id slot;
- unsigned sequenceNumber;
- Id expression;
- };
+ using StoreOperation = KnownState::StoreOperation;
+
+ CommonSubexpressionEliminator(KnownState const& _state): m_state(_state) {}
/// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first
/// item that must be fed into a new instance of the eliminator.
@@ -95,49 +85,10 @@ private:
/// Tries to optimize the item that breaks the basic block at the end.
void optimizeBreakingItem();
- /// Simplifies the given item using
- /// Assigns a new equivalence class to the next sequence number of the given stack element.
- void setStackElement(int _stackHeight, Id _class);
- /// Swaps the given stack elements in their next sequence number.
- void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location);
- /// Retrieves the current equivalence class fo the given stack element (or generates a new
- /// one if it does not exist yet).
- Id stackElement(int _stackHeight, SourceLocation const& _location);
- /// @returns the equivalence class id of the special initial stack element at the given height
- /// (must not be positive).
- Id initialStackElement(int _stackHeight, SourceLocation const& _location);
-
- /// Increments the sequence number, deletes all storage information that might be overwritten
- /// and stores the new value at the given slot.
- void storeInStorage(Id _slot, Id _value, SourceLocation const& _location);
- /// Retrieves the current value at the given slot in storage or creates a new special sload class.
- Id loadFromStorage(Id _slot, SourceLocation const& _location);
- /// Increments the sequence number, deletes all memory information that might be overwritten
- /// and stores the new value at the given slot.
- void storeInMemory(Id _slot, Id _value, SourceLocation const& _location);
- /// Retrieves the current value at the given slot in memory or creates a new special mload class.
- Id loadFromMemory(Id _slot, SourceLocation const& _location);
- /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory.
- Id applySha3(Id _start, Id _length, SourceLocation const& _location);
-
- /// Current stack height, can be negative.
- int m_stackHeight = 0;
- /// Current stack layout, mapping stack height -> equivalence class
- std::map<int, Id> m_stackElements;
- /// Current sequence number, this is incremented with each modification to storage or memory.
- unsigned m_sequenceNumber = 1;
- /// Knowledge about storage content.
- std::map<Id, Id> m_storageContent;
- /// Knowledge about memory content. Keys are memory addresses, note that the values overlap
- /// and are not contained here if they are not completely known.
- std::map<Id, Id> m_memoryContent;
- /// Keeps record of all sha3 hashes that are computed.
- std::map<std::vector<Id>, Id> m_knownSha3Hashes;
+ KnownState m_state;
/// Keeps information about which storage or memory slots were written to at which sequence
/// number with what instruction.
std::vector<StoreOperation> m_storeOperations;
- /// Structure containing the classes of equivalent expressions.
- ExpressionClasses m_expressionClasses;
/// The item that breaks the basic block, can be nullptr.
/// It is usually appended to the block but can be optimized in some cases.
diff --git a/KnownState.cpp b/KnownState.cpp
new file mode 100644
index 00000000..244270fb
--- /dev/null
+++ b/KnownState.cpp
@@ -0,0 +1,278 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * @file KnownState.cpp
+ * @author Christian <c@ethdev.com>
+ * @date 2015
+ * Contains knowledge about the state of the virtual machine at a specific instruction.
+ */
+
+#include "KnownState.h"
+#include <functional>
+#include <libdevcrypto/SHA3.h>
+#include <libevmasm/AssemblyItem.h>
+
+using namespace std;
+using namespace dev;
+using namespace dev::eth;
+
+ostream& KnownState::stream(
+ ostream& _out,
+ map<int, Id> _initialStack,
+ map<int, Id> _targetStack
+) const
+{
+ auto streamExpressionClass = [this](ostream& _out, Id _id)
+ {
+ auto const& expr = m_expressionClasses->representative(_id);
+ _out << " " << dec << _id << ": " << *expr.item;
+ if (expr.sequenceNumber)
+ _out << "@" << dec << expr.sequenceNumber;
+ _out << "(";
+ for (Id arg: expr.arguments)
+ _out << dec << arg << ",";
+ _out << ")" << endl;
+ };
+
+ _out << "Optimizer analysis:" << endl;
+ _out << "Final stack height: " << dec << m_stackHeight << endl;
+ _out << "Equivalence classes: " << endl;
+ for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass)
+ streamExpressionClass(_out, eqClass);
+
+ _out << "Initial stack: " << endl;
+ for (auto const& it: _initialStack)
+ {
+ _out << " " << dec << it.first << ": ";
+ streamExpressionClass(_out, it.second);
+ }
+ _out << "Target stack: " << endl;
+ for (auto const& it: _targetStack)
+ {
+ _out << " " << dec << it.first << ": ";
+ streamExpressionClass(_out, it.second);
+ }
+
+ return _out;
+}
+
+KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem)
+{
+ StoreOperation op;
+ if (_item.type() != Operation)
+ {
+ assertThrow(_item.deposit() == 1, InvalidDeposit, "");
+ setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem));
+ }
+ else
+ {
+ Instruction instruction = _item.instruction();
+ InstructionInfo info = instructionInfo(instruction);
+ if (SemanticInformation::isDupInstruction(_item))
+ setStackElement(
+ m_stackHeight + 1,
+ stackElement(
+ m_stackHeight - int(instruction) + int(Instruction::DUP1),
+ _item.getLocation()
+ )
+ );
+ else if (SemanticInformation::isSwapInstruction(_item))
+ swapStackElements(
+ m_stackHeight,
+ m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1),
+ _item.getLocation()
+ );
+ else if (instruction != Instruction::POP)
+ {
+ vector<Id> arguments(info.args);
+ for (int i = 0; i < info.args; ++i)
+ arguments[i] = stackElement(m_stackHeight - i, _item.getLocation());
+ if (_item.instruction() == Instruction::SSTORE)
+ op = storeInStorage(arguments[0], arguments[1], _item.getLocation());
+ else if (_item.instruction() == Instruction::SLOAD)
+ setStackElement(
+ m_stackHeight + _item.deposit(),
+ loadFromStorage(arguments[0], _item.getLocation())
+ );
+ else if (_item.instruction() == Instruction::MSTORE)
+ op = storeInMemory(arguments[0], arguments[1], _item.getLocation());
+ else if (_item.instruction() == Instruction::MLOAD)
+ setStackElement(
+ m_stackHeight + _item.deposit(),
+ loadFromMemory(arguments[0], _item.getLocation())
+ );
+ else if (_item.instruction() == Instruction::SHA3)
+ setStackElement(
+ m_stackHeight + _item.deposit(),
+ applySha3(arguments.at(0), arguments.at(1), _item.getLocation())
+ );
+ else
+ setStackElement(
+ m_stackHeight + _item.deposit(),
+ m_expressionClasses->find(_item, arguments, _copyItem)
+ );
+ }
+ m_stackHeight += _item.deposit();
+ }
+ return op;
+}
+
+ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location)
+{
+ if (m_stackElements.count(_stackHeight))
+ return m_stackElements.at(_stackHeight);
+ // Stack element not found (not assigned yet), create new equivalence class.
+ return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location);
+}
+
+ExpressionClasses::Id KnownState::initialStackElement(
+ int _stackHeight,
+ SourceLocation const& _location
+)
+{
+ assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested.");
+ assertThrow(_stackHeight > -16, StackTooDeepException, "");
+ // This is a special assembly item that refers to elements pre-existing on the initial stack.
+ return m_expressionClasses->find(AssemblyItem(dupInstruction(1 - _stackHeight), _location));
+}
+
+void KnownState::setStackElement(int _stackHeight, Id _class)
+{
+ m_stackElements[_stackHeight] = _class;
+}
+
+void KnownState::swapStackElements(
+ int _stackHeightA,
+ int _stackHeightB,
+ SourceLocation const& _location
+)
+{
+ assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements.");
+ // ensure they are created
+ stackElement(_stackHeightA, _location);
+ stackElement(_stackHeightB, _location);
+
+ swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]);
+}
+
+KnownState::StoreOperation KnownState::storeInStorage(
+ Id _slot,
+ Id _value,
+ SourceLocation const& _location)
+{
+ if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value)
+ // do not execute the storage if we know that the value is already there
+ return StoreOperation();
+ m_sequenceNumber++;
+ decltype(m_storageContent) storageContents;
+ // Copy over all values (i.e. retain knowledge about them) where we know that this store
+ // operation will not destroy the knowledge. Specifically, we copy storage locations we know
+ // are different from _slot or locations where we know that the stored value is equal to _value.
+ for (auto const& storageItem: m_storageContent)
+ if (m_expressionClasses->knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value)
+ storageContents.insert(storageItem);
+ m_storageContent = move(storageContents);
+
+ AssemblyItem item(Instruction::SSTORE, _location);
+ Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber);
+ StoreOperation operation(StoreOperation::Storage, _slot, m_sequenceNumber, id);
+ m_storageContent[_slot] = _value;
+ // increment a second time so that we get unique sequence numbers for writes
+ m_sequenceNumber++;
+
+ return operation;
+}
+
+ExpressionClasses::Id KnownState::loadFromStorage(Id _slot, SourceLocation const& _location)
+{
+ if (m_storageContent.count(_slot))
+ return m_storageContent.at(_slot);
+
+ AssemblyItem item(Instruction::SLOAD, _location);
+ return m_storageContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber);
+}
+
+KnownState::StoreOperation KnownState::storeInMemory(Id _slot, Id _value, SourceLocation const& _location)
+{
+ if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value)
+ // do not execute the store if we know that the value is already there
+ return StoreOperation();
+ m_sequenceNumber++;
+ decltype(m_memoryContent) memoryContents;
+ // copy over values at points where we know that they are different from _slot by at least 32
+ for (auto const& memoryItem: m_memoryContent)
+ if (m_expressionClasses->knownToBeDifferentBy32(memoryItem.first, _slot))
+ memoryContents.insert(memoryItem);
+ m_memoryContent = move(memoryContents);
+
+ AssemblyItem item(Instruction::MSTORE, _location);
+ Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber);
+ StoreOperation operation(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id));
+ m_memoryContent[_slot] = _value;
+ // increment a second time so that we get unique sequence numbers for writes
+ m_sequenceNumber++;
+ return operation;
+}
+
+ExpressionClasses::Id KnownState::loadFromMemory(Id _slot, SourceLocation const& _location)
+{
+ if (m_memoryContent.count(_slot))
+ return m_memoryContent.at(_slot);
+
+ AssemblyItem item(Instruction::MLOAD, _location);
+ return m_memoryContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber);
+}
+
+KnownState::Id KnownState::applySha3(
+ Id _start,
+ Id _length,
+ SourceLocation const& _location
+)
+{
+ AssemblyItem sha3Item(Instruction::SHA3, _location);
+ // Special logic if length is a short constant, otherwise we cannot tell.
+ u256 const* l = m_expressionClasses->knownConstant(_length);
+ // unknown or too large length
+ if (!l || *l > 128)
+ return m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber);
+
+ vector<Id> arguments;
+ for (u256 i = 0; i < *l; i += 32)
+ {
+ Id slot = m_expressionClasses->find(
+ AssemblyItem(Instruction::ADD, _location),
+ {_start, m_expressionClasses->find(i)}
+ );
+ arguments.push_back(loadFromMemory(slot, _location));
+ }
+ if (m_knownSha3Hashes.count(arguments))
+ return m_knownSha3Hashes.at(arguments);
+ Id v;
+ // If all arguments are known constants, compute the sha3 here
+ if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses->knownConstant(_a); }))
+ {
+ bytes data;
+ for (Id a: arguments)
+ data += toBigEndian(*m_expressionClasses->knownConstant(a));
+ data.resize(size_t(*l));
+ v = m_expressionClasses->find(AssemblyItem(u256(sha3(data)), _location));
+ }
+ else
+ v = m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber);
+ return m_knownSha3Hashes[arguments] = v;
+}
+
diff --git a/KnownState.h b/KnownState.h
new file mode 100644
index 00000000..c6dfcee6
--- /dev/null
+++ b/KnownState.h
@@ -0,0 +1,149 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * @file KnownState.h
+ * @author Christian <c@ethdev.com>
+ * @date 2015
+ * Contains knowledge about the state of the virtual machine at a specific instruction.
+ */
+
+#pragma once
+
+#include <vector>
+#include <map>
+#include <set>
+#include <tuple>
+#include <ostream>
+#include <libdevcore/CommonIO.h>
+#include <libdevcore/Exceptions.h>
+#include <libevmasm/ExpressionClasses.h>
+#include <libevmasm/SemanticInformation.h>
+
+namespace dev
+{
+namespace eth
+{
+
+class AssemblyItem;
+using AssemblyItems = std::vector<AssemblyItem>;
+
+/**
+ * Class to infer and store knowledge about the state of the virtual machine at a specific
+ * instruction.
+ *
+ * The general workings are that for each assembly item that is fed, an equivalence class is
+ * derived from the operation and the equivalence class of its arguments. DUPi, SWAPi and some
+ * arithmetic instructions are used to infer equivalences while these classes are determined.
+ */
+class KnownState
+{
+public:
+ using Id = ExpressionClasses::Id;
+ struct StoreOperation
+ {
+ enum Target { Invalid, Memory, Storage };
+ StoreOperation(): target(Invalid), sequenceNumber(-1) {}
+ StoreOperation(
+ Target _target,
+ Id _slot,
+ unsigned _sequenceNumber,
+ Id _expression
+ ): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {}
+ bool isValid() const { return target != Invalid; }
+ Target target;
+ Id slot;
+ unsigned sequenceNumber;
+ Id expression;
+ };
+
+ KnownState(): m_expressionClasses(std::make_shared<ExpressionClasses>()) {}
+
+ /// Streams debugging information to @a _out.
+ std::ostream& stream(
+ std::ostream& _out,
+ std::map<int, Id> _initialStack = std::map<int, Id>(),
+ std::map<int, Id> _targetStack = std::map<int, Id>()
+ ) const;
+
+ /// Feeds the item into the system for analysis.
+ /// @returns a possible store operation
+ StoreOperation feedItem(AssemblyItem const& _item, bool _copyItem = false);
+
+ /// Resets any knowledge about storage.
+ void resetStorage() { m_storageContent.clear(); }
+ /// Resets any knowledge about storage.
+ void resetMemory() { m_memoryContent.clear(); }
+ /// Resets any knowledge about the current stack.
+ void resetStack() { m_stackElements.clear(); m_stackHeight = 0; }
+ /// Resets any knowledge.
+ void reset() { resetStorage(); resetMemory(); resetStack(); }
+
+ ///@todo the sequence numbers in two copies of this class should never be the same.
+ /// might be doable using two-dimensional sequence numbers, where the first value is incremented
+ /// for each copy
+
+ /// Retrieves the current equivalence class fo the given stack element (or generates a new
+ /// one if it does not exist yet).
+ Id stackElement(int _stackHeight, SourceLocation const& _location);
+ /// @returns the equivalence class id of the special initial stack element at the given height
+ /// (must not be positive).
+ Id initialStackElement(int _stackHeight, SourceLocation const& _location);
+
+ int stackHeight() const { return m_stackHeight; }
+ std::map<int, Id> const& stackElements() const { return m_stackElements; }
+ ExpressionClasses& expressionClasses() const { return *m_expressionClasses; }
+
+private:
+ /// Assigns a new equivalence class to the next sequence number of the given stack element.
+ void setStackElement(int _stackHeight, Id _class);
+ /// Swaps the given stack elements in their next sequence number.
+ void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location);
+
+ /// Increments the sequence number, deletes all storage information that might be overwritten
+ /// and stores the new value at the given slot.
+ /// @returns the store operation, which might be invalid if storage was not modified
+ StoreOperation storeInStorage(Id _slot, Id _value, SourceLocation const& _location);
+ /// Retrieves the current value at the given slot in storage or creates a new special sload class.
+ Id loadFromStorage(Id _slot, SourceLocation const& _location);
+ /// Increments the sequence number, deletes all memory information that might be overwritten
+ /// and stores the new value at the given slot.
+ /// @returns the store operation, which might be invalid if memory was not modified
+ StoreOperation storeInMemory(Id _slot, Id _value, SourceLocation const& _location);
+ /// Retrieves the current value at the given slot in memory or creates a new special mload class.
+ Id loadFromMemory(Id _slot, SourceLocation const& _location);
+ /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory.
+ Id applySha3(Id _start, Id _length, SourceLocation const& _location);
+
+ /// Current stack height, can be negative.
+ int m_stackHeight = 0;
+ /// Current stack layout, mapping stack height -> equivalence class
+ std::map<int, Id> m_stackElements;
+ /// Current sequence number, this is incremented with each modification to storage or memory.
+ unsigned m_sequenceNumber = 1;
+ /// Knowledge about storage content.
+ std::map<Id, Id> m_storageContent;
+ /// Knowledge about memory content. Keys are memory addresses, note that the values overlap
+ /// and are not contained here if they are not completely known.
+ std::map<Id, Id> m_memoryContent;
+ /// Keeps record of all sha3 hashes that are computed.
+ std::map<std::vector<Id>, Id> m_knownSha3Hashes;
+ /// Structure containing the classes of equivalent expressions.
+ std::shared_ptr<ExpressionClasses> m_expressionClasses;
+};
+
+}
+}