diff options
Diffstat (limited to 'libevmasm/KnownState.cpp')
-rw-r--r-- | libevmasm/KnownState.cpp | 411 |
1 files changed, 411 insertions, 0 deletions
diff --git a/libevmasm/KnownState.cpp b/libevmasm/KnownState.cpp new file mode 100644 index 00000000..dd269ff4 --- /dev/null +++ b/libevmasm/KnownState.cpp @@ -0,0 +1,411 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file KnownState.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Contains knowledge about the state of the virtual machine at a specific instruction. + */ + +#include "KnownState.h" +#include <functional> +#include <libdevcore/SHA3.h> +#include <libevmasm/AssemblyItem.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + +ostream& KnownState::stream(ostream& _out) const +{ + auto streamExpressionClass = [this](ostream& _out, Id _id) + { + auto const& expr = m_expressionClasses->representative(_id); + _out << " " << dec << _id << ": "; + if (!expr.item) + _out << " no item"; + else if (expr.item->type() == UndefinedItem) + _out << " unknown " << int(expr.item->data()); + else + _out << *expr.item; + if (expr.sequenceNumber) + _out << "@" << dec << expr.sequenceNumber; + _out << "("; + for (Id arg: expr.arguments) + _out << dec << arg << ","; + _out << ")" << endl; + }; + + _out << "=== State ===" << endl; + _out << "Stack height: " << dec << m_stackHeight << endl; + _out << "Equivalence classes: " << endl; + for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass) + streamExpressionClass(_out, eqClass); + + _out << "Stack: " << endl; + for (auto const& it: m_stackElements) + { + _out << " " << dec << it.first << ": "; + streamExpressionClass(_out, it.second); + } + _out << "Storage: " << endl; + for (auto const& it: m_storageContent) + { + _out << " "; + streamExpressionClass(_out, it.first); + _out << ": "; + streamExpressionClass(_out, it.second); + } + _out << "Memory: " << endl; + for (auto const& it: m_memoryContent) + { + _out << " "; + streamExpressionClass(_out, it.first); + _out << ": "; + streamExpressionClass(_out, it.second); + } + + return _out; +} + +KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem) +{ + StoreOperation op; + if (_item.type() == Tag) + { + // can be ignored + } + else if (_item.type() != Operation) + { + assertThrow(_item.deposit() == 1, InvalidDeposit, ""); + if (_item.pushedValue()) + // only available after assembly stage, should not be used for optimisation + setStackElement(++m_stackHeight, m_expressionClasses->find(*_item.pushedValue())); + else + setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem)); + } + else + { + Instruction instruction = _item.instruction(); + InstructionInfo info = instructionInfo(instruction); + if (SemanticInformation::isDupInstruction(_item)) + setStackElement( + m_stackHeight + 1, + stackElement( + m_stackHeight - int(instruction) + int(Instruction::DUP1), + _item.location() + ) + ); + else if (SemanticInformation::isSwapInstruction(_item)) + swapStackElements( + m_stackHeight, + m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1), + _item.location() + ); + else if (instruction != Instruction::POP) + { + vector<Id> arguments(info.args); + for (int i = 0; i < info.args; ++i) + arguments[i] = stackElement(m_stackHeight - i, _item.location()); + + if (_item.instruction() == Instruction::SSTORE) + op = storeInStorage(arguments[0], arguments[1], _item.location()); + else if (_item.instruction() == Instruction::SLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromStorage(arguments[0], _item.location()) + ); + else if (_item.instruction() == Instruction::MSTORE) + op = storeInMemory(arguments[0], arguments[1], _item.location()); + else if (_item.instruction() == Instruction::MLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromMemory(arguments[0], _item.location()) + ); + else if (_item.instruction() == Instruction::SHA3) + setStackElement( + m_stackHeight + _item.deposit(), + applySha3(arguments.at(0), arguments.at(1), _item.location()) + ); + else + { + bool invMem = SemanticInformation::invalidatesMemory(_item.instruction()); + bool invStor = SemanticInformation::invalidatesStorage(_item.instruction()); + // We could be a bit more fine-grained here (CALL only invalidates part of + // memory, etc), but we do not for now. + if (invMem) + resetMemory(); + if (invStor) + resetStorage(); + if (invMem || invStor) + m_sequenceNumber += 2; // Increment by two because it can read and write + assertThrow(info.ret <= 1, InvalidDeposit, ""); + if (info.ret == 1) + setStackElement( + m_stackHeight + _item.deposit(), + m_expressionClasses->find(_item, arguments, _copyItem) + ); + } + } + m_stackElements.erase( + m_stackElements.upper_bound(m_stackHeight + _item.deposit()), + m_stackElements.end() + ); + m_stackHeight += _item.deposit(); + } + return op; +} + +/// Helper function for KnownState::reduceToCommonKnowledge, removes everything from +/// _this which is not in or not equal to the value in _other. +template <class _Mapping> void intersect(_Mapping& _this, _Mapping const& _other) +{ + for (auto it = _this.begin(); it != _this.end();) + if (_other.count(it->first) && _other.at(it->first) == it->second) + ++it; + else + it = _this.erase(it); +} + +void KnownState::reduceToCommonKnowledge(KnownState const& _other, bool _combineSequenceNumbers) +{ + int stackDiff = m_stackHeight - _other.m_stackHeight; + for (auto it = m_stackElements.begin(); it != m_stackElements.end();) + if (_other.m_stackElements.count(it->first - stackDiff)) + { + Id other = _other.m_stackElements.at(it->first - stackDiff); + if (it->second == other) + ++it; + else + { + set<u256> theseTags = tagsInExpression(it->second); + set<u256> otherTags = tagsInExpression(other); + if (!theseTags.empty() && !otherTags.empty()) + { + theseTags.insert(otherTags.begin(), otherTags.end()); + it->second = tagUnion(theseTags); + ++it; + } + else + it = m_stackElements.erase(it); + } + } + else + it = m_stackElements.erase(it); + + // Use the smaller stack height. Essential to terminate in case of loops. + if (m_stackHeight > _other.m_stackHeight) + { + map<int, Id> shiftedStack; + for (auto const& stackElement: m_stackElements) + shiftedStack[stackElement.first - stackDiff] = stackElement.second; + m_stackElements = move(shiftedStack); + m_stackHeight = _other.m_stackHeight; + } + + intersect(m_storageContent, _other.m_storageContent); + intersect(m_memoryContent, _other.m_memoryContent); + if (_combineSequenceNumbers) + m_sequenceNumber = max(m_sequenceNumber, _other.m_sequenceNumber); +} + +bool KnownState::operator==(KnownState const& _other) const +{ + if (m_storageContent != _other.m_storageContent || m_memoryContent != _other.m_memoryContent) + return false; + int stackDiff = m_stackHeight - _other.m_stackHeight; + auto thisIt = m_stackElements.cbegin(); + auto otherIt = _other.m_stackElements.cbegin(); + for (; thisIt != m_stackElements.cend() && otherIt != _other.m_stackElements.cend(); ++thisIt, ++otherIt) + if (thisIt->first - stackDiff != otherIt->first || thisIt->second != otherIt->second) + return false; + return (thisIt == m_stackElements.cend() && otherIt == _other.m_stackElements.cend()); +} + +ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location) +{ + if (m_stackElements.count(_stackHeight)) + return m_stackElements.at(_stackHeight); + // Stack element not found (not assigned yet), create new unknown equivalence class. + return m_stackElements[_stackHeight] = + m_expressionClasses->find(AssemblyItem(UndefinedItem, _stackHeight, _location)); +} + +KnownState::Id KnownState::relativeStackElement(int _stackOffset, SourceLocation const& _location) +{ + return stackElement(m_stackHeight + _stackOffset, _location); +} + +void KnownState::clearTagUnions() +{ + for (auto it = m_stackElements.begin(); it != m_stackElements.end();) + if (m_tagUnions.left.count(it->second)) + it = m_stackElements.erase(it); + else + ++it; +} + +void KnownState::setStackElement(int _stackHeight, Id _class) +{ + m_stackElements[_stackHeight] = _class; +} + +void KnownState::swapStackElements( + int _stackHeightA, + int _stackHeightB, + SourceLocation const& _location +) +{ + assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements."); + // ensure they are created + stackElement(_stackHeightA, _location); + stackElement(_stackHeightB, _location); + + swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]); +} + +KnownState::StoreOperation KnownState::storeInStorage( + Id _slot, + Id _value, + SourceLocation const& _location) +{ + if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value) + // do not execute the storage if we know that the value is already there + return StoreOperation(); + m_sequenceNumber++; + decltype(m_storageContent) storageContents; + // Copy over all values (i.e. retain knowledge about them) where we know that this store + // operation will not destroy the knowledge. Specifically, we copy storage locations we know + // are different from _slot or locations where we know that the stored value is equal to _value. + for (auto const& storageItem: m_storageContent) + if (m_expressionClasses->knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value) + storageContents.insert(storageItem); + m_storageContent = move(storageContents); + + AssemblyItem item(Instruction::SSTORE, _location); + Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber); + StoreOperation operation(StoreOperation::Storage, _slot, m_sequenceNumber, id); + m_storageContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; + + return operation; +} + +ExpressionClasses::Id KnownState::loadFromStorage(Id _slot, SourceLocation const& _location) +{ + if (m_storageContent.count(_slot)) + return m_storageContent.at(_slot); + + AssemblyItem item(Instruction::SLOAD, _location); + return m_storageContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber); +} + +KnownState::StoreOperation KnownState::storeInMemory(Id _slot, Id _value, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value) + // do not execute the store if we know that the value is already there + return StoreOperation(); + m_sequenceNumber++; + decltype(m_memoryContent) memoryContents; + // copy over values at points where we know that they are different from _slot by at least 32 + for (auto const& memoryItem: m_memoryContent) + if (m_expressionClasses->knownToBeDifferentBy32(memoryItem.first, _slot)) + memoryContents.insert(memoryItem); + m_memoryContent = move(memoryContents); + + AssemblyItem item(Instruction::MSTORE, _location); + Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber); + StoreOperation operation(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id)); + m_memoryContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; + return operation; +} + +ExpressionClasses::Id KnownState::loadFromMemory(Id _slot, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot)) + return m_memoryContent.at(_slot); + + AssemblyItem item(Instruction::MLOAD, _location); + return m_memoryContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber); +} + +KnownState::Id KnownState::applySha3( + Id _start, + Id _length, + SourceLocation const& _location +) +{ + AssemblyItem sha3Item(Instruction::SHA3, _location); + // Special logic if length is a short constant, otherwise we cannot tell. + u256 const* l = m_expressionClasses->knownConstant(_length); + // unknown or too large length + if (!l || *l > 128) + return m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber); + + vector<Id> arguments; + for (u256 i = 0; i < *l; i += 32) + { + Id slot = m_expressionClasses->find( + AssemblyItem(Instruction::ADD, _location), + {_start, m_expressionClasses->find(i)} + ); + arguments.push_back(loadFromMemory(slot, _location)); + } + if (m_knownSha3Hashes.count(arguments)) + return m_knownSha3Hashes.at(arguments); + Id v; + // If all arguments are known constants, compute the sha3 here + if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses->knownConstant(_a); })) + { + bytes data; + for (Id a: arguments) + data += toBigEndian(*m_expressionClasses->knownConstant(a)); + data.resize(size_t(*l)); + v = m_expressionClasses->find(AssemblyItem(u256(sha3(data)), _location)); + } + else + v = m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber); + return m_knownSha3Hashes[arguments] = v; +} + +set<u256> KnownState::tagsInExpression(KnownState::Id _expressionId) +{ + if (m_tagUnions.left.count(_expressionId)) + return m_tagUnions.left.at(_expressionId); + // Might be a tag, then return the set of itself. + ExpressionClasses::Expression expr = m_expressionClasses->representative(_expressionId); + if (expr.item && expr.item->type() == PushTag) + return set<u256>({expr.item->data()}); + else + return set<u256>(); +} + +KnownState::Id KnownState::tagUnion(set<u256> _tags) +{ + if (m_tagUnions.right.count(_tags)) + return m_tagUnions.right.at(_tags); + else + { + Id id = m_expressionClasses->newClass(SourceLocation()); + m_tagUnions.right.insert(make_pair(_tags, id)); + return id; + } +} + |