diff options
author | chriseth <c@ethdev.com> | 2015-05-09 00:58:42 +0800 |
---|---|---|
committer | chriseth <c@ethdev.com> | 2015-05-09 00:58:42 +0800 |
commit | 6cc71a188f3c59b32ac1f131ee74c703f1f81a70 (patch) | |
tree | f5f228b61fd53b6072db548c705463d05e26e2be | |
parent | 4d62c463d143c93f7938db5b8f7d01d33aa1a698 (diff) | |
parent | 1dfcb4735011dfaa143d6592713ec6b4bf097934 (diff) | |
download | dexon-solidity-6cc71a188f3c59b32ac1f131ee74c703f1f81a70.tar.gz dexon-solidity-6cc71a188f3c59b32ac1f131ee74c703f1f81a70.tar.zst dexon-solidity-6cc71a188f3c59b32ac1f131ee74c703f1f81a70.zip |
Merge pull request #1813 from chriseth/sol_knowledgeEngine
Static Analysis Engine.
-rw-r--r-- | Assembly.cpp | 16 | ||||
-rw-r--r-- | CommonSubexpressionEliminator.cpp | 271 | ||||
-rw-r--r-- | CommonSubexpressionEliminator.h | 63 | ||||
-rw-r--r-- | ControlFlowGraph.cpp | 109 | ||||
-rw-r--r-- | ControlFlowGraph.h | 32 | ||||
-rw-r--r-- | ExpressionClasses.cpp | 44 | ||||
-rw-r--r-- | ExpressionClasses.h | 4 | ||||
-rw-r--r-- | KnownState.cpp | 326 | ||||
-rw-r--r-- | KnownState.h | 163 | ||||
-rw-r--r-- | SemanticInformation.cpp | 53 | ||||
-rw-r--r-- | SemanticInformation.h | 8 |
11 files changed, 742 insertions, 347 deletions
diff --git a/Assembly.cpp b/Assembly.cpp index 6cc09a4b..9530ded4 100644 --- a/Assembly.cpp +++ b/Assembly.cpp @@ -304,9 +304,6 @@ Assembly& Assembly::optimise(bool _enable) { if (!_enable) return *this; - std::vector<pair<AssemblyItems, function<AssemblyItems(AssemblyItemsConstRef)>>> rules; - // jump to next instruction - rules.push_back({ { PushTag, Instruction::JUMP, Tag }, [](AssemblyItemsConstRef m) -> AssemblyItems { if (m[0].data() == m[2].data()) return {m[2]}; else return m.toVector(); }}); unsigned total = 0; for (unsigned count = 1; count > 0; total += count) @@ -314,10 +311,17 @@ Assembly& Assembly::optimise(bool _enable) copt << toString(*this); count = 0; + //@todo CFG interface should be a generator, that returns an item and a pointer to a + // knownstate, which has to replace the current state if it is not null. + // Feed these items to the CSE, but also store them and replace the stored version + // if the items generated by the CSE are shorter. (or even use less gas?) copt << "Performing control flow analysis..."; { ControlFlowGraph cfg(m_items); - AssemblyItems optItems = cfg.optimisedItems(); + AssemblyItems optItems; + for (BasicBlock const& block: cfg.optimisedBlocks()) + copy(m_items.begin() + block.begin, m_items.begin() + block.end, + back_inserter(optItems)); if (optItems.size() < m_items.size()) { copt << "Old size: " << m_items.size() << ", new size: " << optItems.size(); @@ -329,7 +333,9 @@ Assembly& Assembly::optimise(bool _enable) copt << "Performing common subexpression elimination..."; for (auto iter = m_items.begin(); iter != m_items.end();) { - CommonSubexpressionEliminator eliminator; + //@todo use only a single state / expression classes instance. + KnownState state(make_shared<ExpressionClasses>()); + CommonSubexpressionEliminator eliminator(state); auto orig = iter; iter = eliminator.feedItems(iter, m_items.end()); AssemblyItems optItems; diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp index 63524d6f..4b85eba4 100644 --- a/CommonSubexpressionEliminator.cpp +++ b/CommonSubexpressionEliminator.cpp @@ -37,18 +37,19 @@ vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems() map<int, Id> initialStackContents; map<int, Id> targetStackContents; - int minHeight = m_stackHeight + 1; - if (!m_stackElements.empty()) - minHeight = min(minHeight, m_stackElements.begin()->first); - for (int height = minHeight; height <= 0; ++height) - initialStackContents[height] = initialStackElement(height, SourceLocation()); - for (int height = minHeight; height <= m_stackHeight; ++height) - targetStackContents[height] = stackElement(height, SourceLocation()); + int minHeight = m_state.stackHeight() + 1; + if (!m_state.stackElements().empty()) + minHeight = min(minHeight, m_state.stackElements().begin()->first); + for (int height = minHeight; height <= m_initialState.stackHeight(); ++height) + initialStackContents[height] = m_initialState.stackElement(height, SourceLocation()); + for (int height = minHeight; height <= m_state.stackHeight(); ++height) + targetStackContents[height] = m_state.stackElement(height, SourceLocation()); // Debug info: //stream(cout, initialStackContents, targetStackContents); - AssemblyItems items = CSECodeGenerator(m_expressionClasses, m_storeOperations).generateCode( + AssemblyItems items = CSECodeGenerator(m_state.expressionClasses(), m_storeOperations).generateCode( + m_initialState.stackHeight(), initialStackContents, targetStackContents ); @@ -57,103 +58,11 @@ vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems() return items; } -ostream& CommonSubexpressionEliminator::stream( - ostream& _out, - map<int, Id> _initialStack, - map<int, Id> _targetStack -) const -{ - auto streamExpressionClass = [this](ostream& _out, Id _id) - { - auto const& expr = m_expressionClasses.representative(_id); - _out << " " << dec << _id << ": " << *expr.item; - if (expr.sequenceNumber) - _out << "@" << dec << expr.sequenceNumber; - _out << "("; - for (Id arg: expr.arguments) - _out << dec << arg << ","; - _out << ")" << endl; - }; - - _out << "Optimizer analysis:" << endl; - _out << "Final stack height: " << dec << m_stackHeight << endl; - _out << "Equivalence classes: " << endl; - for (Id eqClass = 0; eqClass < m_expressionClasses.size(); ++eqClass) - streamExpressionClass(_out, eqClass); - - _out << "Initial stack: " << endl; - for (auto const& it: _initialStack) - { - _out << " " << dec << it.first << ": "; - streamExpressionClass(_out, it.second); - } - _out << "Target stack: " << endl; - for (auto const& it: _targetStack) - { - _out << " " << dec << it.first << ": "; - streamExpressionClass(_out, it.second); - } - - return _out; -} - void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _copyItem) { - if (_item.type() != Operation) - { - assertThrow(_item.deposit() == 1, InvalidDeposit, ""); - setStackElement(++m_stackHeight, m_expressionClasses.find(_item, {}, _copyItem)); - } - else - { - Instruction instruction = _item.instruction(); - InstructionInfo info = instructionInfo(instruction); - if (SemanticInformation::isDupInstruction(_item)) - setStackElement( - m_stackHeight + 1, - stackElement( - m_stackHeight - int(instruction) + int(Instruction::DUP1), - _item.getLocation() - ) - ); - else if (SemanticInformation::isSwapInstruction(_item)) - swapStackElements( - m_stackHeight, - m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1), - _item.getLocation() - ); - else if (instruction != Instruction::POP) - { - vector<Id> arguments(info.args); - for (int i = 0; i < info.args; ++i) - arguments[i] = stackElement(m_stackHeight - i, _item.getLocation()); - if (_item.instruction() == Instruction::SSTORE) - storeInStorage(arguments[0], arguments[1], _item.getLocation()); - else if (_item.instruction() == Instruction::SLOAD) - setStackElement( - m_stackHeight + _item.deposit(), - loadFromStorage(arguments[0], _item.getLocation()) - ); - else if (_item.instruction() == Instruction::MSTORE) - storeInMemory(arguments[0], arguments[1], _item.getLocation()); - else if (_item.instruction() == Instruction::MLOAD) - setStackElement( - m_stackHeight + _item.deposit(), - loadFromMemory(arguments[0], _item.getLocation()) - ); - else if (_item.instruction() == Instruction::SHA3) - setStackElement( - m_stackHeight + _item.deposit(), - applySha3(arguments.at(0), arguments.at(1), _item.getLocation()) - ); - else - setStackElement( - m_stackHeight + _item.deposit(), - m_expressionClasses.find(_item, arguments, _copyItem) - ); - } - m_stackHeight += _item.deposit(); - } + StoreOperation op = m_state.feedItem(_item, _copyItem); + if (op.isValid()) + m_storeOperations.push_back(op); } void CommonSubexpressionEliminator::optimizeBreakingItem() @@ -164,20 +73,20 @@ void CommonSubexpressionEliminator::optimizeBreakingItem() SourceLocation const& location = m_breakingItem->getLocation(); AssemblyItem::JumpType jumpType = m_breakingItem->getJumpType(); - Id condition = stackElement(m_stackHeight - 1, location); - Id zero = m_expressionClasses.find(u256(0)); - if (m_expressionClasses.knownToBeDifferent(condition, zero)) + Id condition = m_state.stackElement(m_state.stackHeight() - 1, location); + Id zero = m_state.expressionClasses().find(u256(0)); + if (m_state.expressionClasses().knownToBeDifferent(condition, zero)) { feedItem(AssemblyItem(Instruction::SWAP1, location), true); feedItem(AssemblyItem(Instruction::POP, location), true); AssemblyItem item(Instruction::JUMP, location); item.setJumpType(jumpType); - m_breakingItem = m_expressionClasses.storeItem(item); + m_breakingItem = m_state.expressionClasses().storeItem(item); return; } - Id negatedCondition = m_expressionClasses.find(Instruction::ISZERO, {condition}); - if (m_expressionClasses.knownToBeDifferent(negatedCondition, zero)) + Id negatedCondition = m_state.expressionClasses().find(Instruction::ISZERO, {condition}); + if (m_state.expressionClasses().knownToBeDifferent(negatedCondition, zero)) { AssemblyItem it(Instruction::POP, location); feedItem(it, true); @@ -186,148 +95,6 @@ void CommonSubexpressionEliminator::optimizeBreakingItem() } } -void CommonSubexpressionEliminator::setStackElement(int _stackHeight, Id _class) -{ - m_stackElements[_stackHeight] = _class; -} - -void CommonSubexpressionEliminator::swapStackElements( - int _stackHeightA, - int _stackHeightB, - SourceLocation const& _location -) -{ - assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements."); - // ensure they are created - stackElement(_stackHeightA, _location); - stackElement(_stackHeightB, _location); - - swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]); -} - -ExpressionClasses::Id CommonSubexpressionEliminator::stackElement( - int _stackHeight, - SourceLocation const& _location -) -{ - if (m_stackElements.count(_stackHeight)) - return m_stackElements.at(_stackHeight); - // Stack element not found (not assigned yet), create new equivalence class. - return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location); -} - -ExpressionClasses::Id CommonSubexpressionEliminator::initialStackElement( - int _stackHeight, - SourceLocation const& _location -) -{ - assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested."); - assertThrow(_stackHeight > -16, StackTooDeepException, ""); - // This is a special assembly item that refers to elements pre-existing on the initial stack. - return m_expressionClasses.find(AssemblyItem(dupInstruction(1 - _stackHeight), _location)); -} - -void CommonSubexpressionEliminator::storeInStorage(Id _slot, Id _value, SourceLocation const& _location) -{ - if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value) - // do not execute the storage if we know that the value is already there - return; - m_sequenceNumber++; - decltype(m_storageContent) storageContents; - // Copy over all values (i.e. retain knowledge about them) where we know that this store - // operation will not destroy the knowledge. Specifically, we copy storage locations we know - // are different from _slot or locations where we know that the stored value is equal to _value. - for (auto const& storageItem: m_storageContent) - if (m_expressionClasses.knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value) - storageContents.insert(storageItem); - m_storageContent = move(storageContents); - - AssemblyItem item(Instruction::SSTORE, _location); - Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber); - m_storeOperations.push_back(StoreOperation(StoreOperation::Storage, _slot, m_sequenceNumber, id)); - m_storageContent[_slot] = _value; - // increment a second time so that we get unique sequence numbers for writes - m_sequenceNumber++; -} - -ExpressionClasses::Id CommonSubexpressionEliminator::loadFromStorage(Id _slot, SourceLocation const& _location) -{ - if (m_storageContent.count(_slot)) - return m_storageContent.at(_slot); - - AssemblyItem item(Instruction::SLOAD, _location); - return m_storageContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber); -} - -void CommonSubexpressionEliminator::storeInMemory(Id _slot, Id _value, SourceLocation const& _location) -{ - if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value) - // do not execute the store if we know that the value is already there - return; - m_sequenceNumber++; - decltype(m_memoryContent) memoryContents; - // copy over values at points where we know that they are different from _slot by at least 32 - for (auto const& memoryItem: m_memoryContent) - if (m_expressionClasses.knownToBeDifferentBy32(memoryItem.first, _slot)) - memoryContents.insert(memoryItem); - m_memoryContent = move(memoryContents); - - AssemblyItem item(Instruction::MSTORE, _location); - Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber); - m_storeOperations.push_back(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id)); - m_memoryContent[_slot] = _value; - // increment a second time so that we get unique sequence numbers for writes - m_sequenceNumber++; -} - -ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(Id _slot, SourceLocation const& _location) -{ - if (m_memoryContent.count(_slot)) - return m_memoryContent.at(_slot); - - AssemblyItem item(Instruction::MLOAD, _location); - return m_memoryContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber); -} - -CommonSubexpressionEliminator::Id CommonSubexpressionEliminator::applySha3( - Id _start, - Id _length, - SourceLocation const& _location -) -{ - AssemblyItem sha3Item(Instruction::SHA3, _location); - // Special logic if length is a short constant, otherwise we cannot tell. - u256 const* l = m_expressionClasses.knownConstant(_length); - // unknown or too large length - if (!l || *l > 128) - return m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber); - - vector<Id> arguments; - for (u256 i = 0; i < *l; i += 32) - { - Id slot = m_expressionClasses.find( - AssemblyItem(Instruction::ADD, _location), - {_start, m_expressionClasses.find(i)} - ); - arguments.push_back(loadFromMemory(slot, _location)); - } - if (m_knownSha3Hashes.count(arguments)) - return m_knownSha3Hashes.at(arguments); - Id v; - // If all arguments are known constants, compute the sha3 here - if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses.knownConstant(_a); })) - { - bytes data; - for (Id a: arguments) - data += toBigEndian(*m_expressionClasses.knownConstant(a)); - data.resize(size_t(*l)); - v = m_expressionClasses.find(AssemblyItem(u256(sha3(data)), _location)); - } - else - v = m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber); - return m_knownSha3Hashes[arguments] = v; -} - CSECodeGenerator::CSECodeGenerator( ExpressionClasses& _expressionClasses, vector<CSECodeGenerator::StoreOperation> const& _storeOperations @@ -339,10 +106,12 @@ CSECodeGenerator::CSECodeGenerator( } AssemblyItems CSECodeGenerator::generateCode( + int _initialStackHeight, map<int, Id> const& _initialStack, map<int, Id> const& _targetStackContents ) { + m_stackHeight = _initialStackHeight; m_stack = _initialStack; for (auto const& item: m_stack) if (!m_classPositions.count(item.second)) diff --git a/CommonSubexpressionEliminator.h b/CommonSubexpressionEliminator.h index 6156bc81..6e1ba40b 100644 --- a/CommonSubexpressionEliminator.h +++ b/CommonSubexpressionEliminator.h @@ -32,6 +32,7 @@ #include <libdevcore/Exceptions.h> #include <libevmasm/ExpressionClasses.h> #include <libevmasm/SemanticInformation.h> +#include <libevmasm/KnownState.h> namespace dev { @@ -58,20 +59,9 @@ class CommonSubexpressionEliminator { public: using Id = ExpressionClasses::Id; - struct StoreOperation - { - enum Target { Memory, Storage }; - StoreOperation( - Target _target, - Id _slot, - unsigned _sequenceNumber, - Id _expression - ): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {} - Target target; - Id slot; - unsigned sequenceNumber; - Id expression; - }; + using StoreOperation = KnownState::StoreOperation; + + CommonSubexpressionEliminator(KnownState const& _state): m_initialState(_state), m_state(_state) {} /// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first /// item that must be fed into a new instance of the eliminator. @@ -95,49 +85,11 @@ private: /// Tries to optimize the item that breaks the basic block at the end. void optimizeBreakingItem(); - /// Simplifies the given item using - /// Assigns a new equivalence class to the next sequence number of the given stack element. - void setStackElement(int _stackHeight, Id _class); - /// Swaps the given stack elements in their next sequence number. - void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location); - /// Retrieves the current equivalence class fo the given stack element (or generates a new - /// one if it does not exist yet). - Id stackElement(int _stackHeight, SourceLocation const& _location); - /// @returns the equivalence class id of the special initial stack element at the given height - /// (must not be positive). - Id initialStackElement(int _stackHeight, SourceLocation const& _location); - - /// Increments the sequence number, deletes all storage information that might be overwritten - /// and stores the new value at the given slot. - void storeInStorage(Id _slot, Id _value, SourceLocation const& _location); - /// Retrieves the current value at the given slot in storage or creates a new special sload class. - Id loadFromStorage(Id _slot, SourceLocation const& _location); - /// Increments the sequence number, deletes all memory information that might be overwritten - /// and stores the new value at the given slot. - void storeInMemory(Id _slot, Id _value, SourceLocation const& _location); - /// Retrieves the current value at the given slot in memory or creates a new special mload class. - Id loadFromMemory(Id _slot, SourceLocation const& _location); - /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory. - Id applySha3(Id _start, Id _length, SourceLocation const& _location); - - /// Current stack height, can be negative. - int m_stackHeight = 0; - /// Current stack layout, mapping stack height -> equivalence class - std::map<int, Id> m_stackElements; - /// Current sequence number, this is incremented with each modification to storage or memory. - unsigned m_sequenceNumber = 1; - /// Knowledge about storage content. - std::map<Id, Id> m_storageContent; - /// Knowledge about memory content. Keys are memory addresses, note that the values overlap - /// and are not contained here if they are not completely known. - std::map<Id, Id> m_memoryContent; - /// Keeps record of all sha3 hashes that are computed. - std::map<std::vector<Id>, Id> m_knownSha3Hashes; + KnownState m_initialState; + KnownState m_state; /// Keeps information about which storage or memory slots were written to at which sequence /// number with what instruction. std::vector<StoreOperation> m_storeOperations; - /// Structure containing the classes of equivalent expressions. - ExpressionClasses m_expressionClasses; /// The item that breaks the basic block, can be nullptr. /// It is usually appended to the block but can be optimized in some cases. @@ -164,6 +116,7 @@ public: /// @param _targetStackContents final contents of the stack, by stack height relative to initial /// @note should only be called once on each object. AssemblyItems generateCode( + int _initialStackHeight, std::map<int, Id> const& _initialStack, std::map<int, Id> const& _targetStackContents ); @@ -199,7 +152,7 @@ private: AssemblyItems m_generatedItems; /// Current height of the stack relative to the start. - int m_stackHeight = 0; + int m_stackHeight; /// If (b, a) is in m_requests then b is needed to compute a. std::multimap<Id, Id> m_neededBy; /// Current content of the stack. diff --git a/ControlFlowGraph.cpp b/ControlFlowGraph.cpp index cc4367e6..2e28317a 100644 --- a/ControlFlowGraph.cpp +++ b/ControlFlowGraph.cpp @@ -23,9 +23,11 @@ #include <libevmasm/ControlFlowGraph.h> #include <map> +#include <memory> #include <libevmasm/Exceptions.h> #include <libevmasm/AssemblyItem.h> #include <libevmasm/SemanticInformation.h> +#include <libevmasm/KnownState.h> using namespace std; using namespace dev; @@ -36,16 +38,17 @@ BlockId::BlockId(u256 const& _id): m_id(_id) assertThrow( _id < initial().m_id, OptimizerException, "Tag number too large."); } -AssemblyItems ControlFlowGraph::optimisedItems() +BasicBlocks ControlFlowGraph::optimisedBlocks() { if (m_items.empty()) - return m_items; + return BasicBlocks(); findLargestTag(); splitBlocks(); resolveNextLinks(); removeUnusedBlocks(); setPrevLinks(); + gatherKnowledge(); return rebuildCode(); } @@ -209,7 +212,78 @@ void ControlFlowGraph::setPrevLinks() } } -AssemblyItems ControlFlowGraph::rebuildCode() +void ControlFlowGraph::gatherKnowledge() +{ + // @todo actually we know that memory is filled with zeros at the beginning, + // we could make use of that. + KnownStatePointer emptyState = make_shared<KnownState>(); + ExpressionClasses& expr = emptyState->expressionClasses(); + bool unknownJumpEncountered = false; + + vector<pair<BlockId, KnownStatePointer>> workQueue({make_pair(BlockId::initial(), emptyState->copy())}); + while (!workQueue.empty()) + { + //@todo we might have to do something like incrementing the sequence number for each JUMPDEST + assertThrow(!!workQueue.back().first, OptimizerException, ""); + BasicBlock& block = m_blocks.at(workQueue.back().first); + KnownStatePointer state = workQueue.back().second; + workQueue.pop_back(); + if (block.startState) + { + state->reduceToCommonKnowledge(*block.startState); + if (*state == *block.startState) + continue; + } + + block.startState = state->copy(); + //@todo we might know the return address for the first pass, but not anymore for the second, + // -> store knowledge about tags as a union. + + // Feed all items except for the final jump yet because it will erase the target tag. + unsigned pc = block.begin; + while (pc < block.end && !SemanticInformation::altersControlFlow(m_items.at(pc))) + state->feedItem(m_items.at(pc++)); + + if ( + block.endType == BasicBlock::EndType::JUMP || + block.endType == BasicBlock::EndType::JUMPI + ) + { + assertThrow(block.begin <= pc && pc == block.end - 1, OptimizerException, ""); + //@todo in the case of JUMPI, add knowledge about the condition to the state + // (for both values of the condition) + BlockId nextBlock = expressionClassToBlockId( + state->stackElement(state->stackHeight(), SourceLocation()), + expr + ); + state->feedItem(m_items.at(pc++)); + if (nextBlock) + workQueue.push_back(make_pair(nextBlock, state->copy())); + else if (!unknownJumpEncountered) + { + // We do not know where this jump goes, so we have to reset the states of all + // JUMPDESTs. + unknownJumpEncountered = true; + for (auto const& it: m_blocks) + if (it.second.begin < it.second.end && m_items[it.second.begin].type() == Tag) + workQueue.push_back(make_pair(it.first, emptyState->copy())); + } + } + else if (block.begin <= pc && pc < block.end) + state->feedItem(m_items.at(pc++)); + assertThrow(block.end <= block.begin || pc == block.end, OptimizerException, ""); + + block.endState = state; + + if ( + block.endType == BasicBlock::EndType::HANDOVER || + block.endType == BasicBlock::EndType::JUMPI + ) + workQueue.push_back(make_pair(block.next, state->copy())); + } +} + +BasicBlocks ControlFlowGraph::rebuildCode() { map<BlockId, unsigned> pushes; for (auto& idAndBlock: m_blocks) @@ -220,7 +294,7 @@ AssemblyItems ControlFlowGraph::rebuildCode() for (auto it: m_blocks) blocksToAdd.insert(it.first); set<BlockId> blocksAdded; - AssemblyItems code; + BasicBlocks blocks; for ( BlockId blockId = BlockId::initial(); @@ -233,23 +307,34 @@ AssemblyItems ControlFlowGraph::rebuildCode() blockId = m_blocks.at(blockId).prev; for (; blockId; blockId = m_blocks.at(blockId).next) { - BasicBlock const& block = m_blocks.at(blockId); + BasicBlock& block = m_blocks.at(blockId); blocksToAdd.erase(blockId); blocksAdded.insert(blockId); - auto begin = m_items.begin() + block.begin; - auto end = m_items.begin() + block.end; - if (begin == end) + if (block.begin == block.end) continue; // If block starts with unused tag, skip it. - if (previousHandedOver && !pushes[blockId] && begin->type() == Tag) - ++begin; + if (previousHandedOver && !pushes[blockId] && m_items[block.begin].type() == Tag) + ++block.begin; + if (block.begin < block.end) + blocks.push_back(block); previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER); - copy(begin, end, back_inserter(code)); } } - return code; + return blocks; +} + +BlockId ControlFlowGraph::expressionClassToBlockId( + ExpressionClasses::Id _id, + ExpressionClasses& _exprClasses +) +{ + ExpressionClasses::Expression expr = _exprClasses.representative(_id); + if (expr.item && expr.item->type() == PushTag) + return BlockId(expr.item->data()); + else + return BlockId::invalid(); } BlockId ControlFlowGraph::generateNewId() diff --git a/ControlFlowGraph.h b/ControlFlowGraph.h index 5d16df32..3366dc45 100644 --- a/ControlFlowGraph.h +++ b/ControlFlowGraph.h @@ -24,16 +24,18 @@ #pragma once #include <vector> +#include <memory> #include <libdevcore/Common.h> #include <libdevcore/Assertions.h> +#include <libevmasm/ExpressionClasses.h> namespace dev { namespace eth { -class AssemblyItem; -using AssemblyItems = std::vector<AssemblyItem>; +class KnownState; +using KnownStatePointer = std::shared_ptr<KnownState>; /** * Identifier for a block, coincides with the tag number of an AssemblyItem but adds a special @@ -69,32 +71,46 @@ struct BasicBlock unsigned end = 0; /// Tags pushed inside this block, with multiplicity. std::vector<BlockId> pushedTags; - /// ID of the block that always follows this one (either JUMP or flow into new block), - /// or BlockId::invalid() otherwise + /// ID of the block that always follows this one (either non-branching part of JUMPI or flow + /// into new block), or BlockId::invalid() otherwise BlockId next = BlockId::invalid(); - /// ID of the block that has to precede this one. + /// ID of the block that has to precede this one (because control flows into it). BlockId prev = BlockId::invalid(); enum class EndType { JUMP, JUMPI, STOP, HANDOVER }; EndType endType = EndType::HANDOVER; + + /// Knowledge about the state when this block is entered. Intersection of all possible ways + /// to enter this block. + KnownStatePointer startState; + /// Knowledge about the state at the end of this block. + KnownStatePointer endState; }; +using BasicBlocks = std::vector<BasicBlock>; + class ControlFlowGraph { public: /// Initializes the control flow graph. /// @a _items has to persist across the usage of this class. ControlFlowGraph(AssemblyItems const& _items): m_items(_items) {} - /// @returns the collection of optimised items, should be called only once. - AssemblyItems optimisedItems(); + /// @returns vector of basic blocks in the order they should be used in the final code. + /// Should be called only once. + BasicBlocks optimisedBlocks(); private: void findLargestTag(); void splitBlocks(); void resolveNextLinks(); void removeUnusedBlocks(); + void gatherKnowledge(); void setPrevLinks(); - AssemblyItems rebuildCode(); + BasicBlocks rebuildCode(); + + /// @returns the corresponding BlockId if _id is a pushed jump tag, + /// and an invalid BlockId otherwise. + BlockId expressionClassToBlockId(ExpressionClasses::Id _id, ExpressionClasses& _exprClasses); BlockId generateNewId(); diff --git a/ExpressionClasses.cpp b/ExpressionClasses.cpp index 1e60a7fe..cfbeba7f 100644 --- a/ExpressionClasses.cpp +++ b/ExpressionClasses.cpp @@ -37,6 +37,7 @@ using namespace dev::eth; bool ExpressionClasses::Expression::operator<(ExpressionClasses::Expression const& _other) const { + assertThrow(!!item && !!_other.item, OptimizerException, ""); auto type = item->type(); auto otherType = _other.item->type(); return std::tie(type, item->data(), arguments, sequenceNumber) < @@ -56,12 +57,15 @@ ExpressionClasses::Id ExpressionClasses::find( exp.arguments = _arguments; exp.sequenceNumber = _sequenceNumber; - if (SemanticInformation::isCommutativeOperation(_item)) - sort(exp.arguments.begin(), exp.arguments.end()); + if (SemanticInformation::isDeterministic(_item)) + { + if (SemanticInformation::isCommutativeOperation(_item)) + sort(exp.arguments.begin(), exp.arguments.end()); - auto it = m_expressions.find(exp); - if (it != m_expressions.end()) - return it->id; + auto it = m_expressions.find(exp); + if (it != m_expressions.end()) + return it->id; + } if (_copyItem) exp.item = storeItem(_item); @@ -122,10 +126,16 @@ string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const { Expression const& expr = representative(_id); stringstream str; - str << dec << expr.id << ":" << *expr.item << "("; - for (Id arg: expr.arguments) - str << fullDAGToString(arg) << ","; - str << ")"; + str << dec << expr.id << ":"; + if (expr.item) + { + str << *expr.item << "("; + for (Id arg: expr.arguments) + str << fullDAGToString(arg) << ","; + str << ")"; + } + else + str << " UNIQUE"; return str.str(); } @@ -279,7 +289,11 @@ ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, { static Rules rules; - if (_expr.item->type() != Operation) + if ( + !_expr.item || + _expr.item->type() != Operation || + !SemanticInformation::isDeterministic(*_expr.item) + ) return -1; for (auto const& rule: rules.rules()) @@ -337,7 +351,7 @@ void Pattern::setMatchGroup(unsigned _group, map<unsigned, Expression const*>& _ bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const { - if (!matchesBaseItem(*_expr.item)) + if (!matchesBaseItem(_expr.item)) return false; if (m_matchGroup) { @@ -387,13 +401,15 @@ string Pattern::toString() const return s.str(); } -bool Pattern::matchesBaseItem(AssemblyItem const& _item) const +bool Pattern::matchesBaseItem(AssemblyItem const* _item) const { if (m_type == UndefinedItem) return true; - if (m_type != _item.type()) + if (!_item) + return false; + if (m_type != _item->type()) return false; - if (m_requireDataMatch && m_data != _item.data()) + if (m_requireDataMatch && m_data != _item->data()) return false; return true; } diff --git a/ExpressionClasses.h b/ExpressionClasses.h index 2f720f60..c8352030 100644 --- a/ExpressionClasses.h +++ b/ExpressionClasses.h @@ -50,7 +50,7 @@ public: struct Expression { Id id; - AssemblyItem const* item; + AssemblyItem const* item = nullptr; Ids arguments; unsigned sequenceNumber; ///< Storage modification sequence, only used for SLOAD/SSTORE instructions. /// Behaves as if this was a tuple of (item->type(), item->data(), arguments, sequenceNumber). @@ -149,7 +149,7 @@ public: std::string toString() const; private: - bool matchesBaseItem(AssemblyItem const& _item) const; + bool matchesBaseItem(AssemblyItem const* _item) const; Expression const& matchGroupValue() const; AssemblyItemType m_type; diff --git a/KnownState.cpp b/KnownState.cpp new file mode 100644 index 00000000..41ac4802 --- /dev/null +++ b/KnownState.cpp @@ -0,0 +1,326 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file KnownState.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Contains knowledge about the state of the virtual machine at a specific instruction. + */ + +#include "KnownState.h" +#include <functional> +#include <libdevcrypto/SHA3.h> +#include <libevmasm/AssemblyItem.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + +ostream& KnownState::stream(ostream& _out) const +{ + auto streamExpressionClass = [this](ostream& _out, Id _id) + { + auto const& expr = m_expressionClasses->representative(_id); + _out << " " << dec << _id << ": "; + if (!expr.item) + _out << " no item"; + else if (expr.item->type() == UndefinedItem) + _out << " unknown " << int(expr.item->data()); + else + _out << *expr.item; + if (expr.sequenceNumber) + _out << "@" << dec << expr.sequenceNumber; + _out << "("; + for (Id arg: expr.arguments) + _out << dec << arg << ","; + _out << ")" << endl; + }; + + _out << "=== State ===" << endl; + _out << "Stack height: " << dec << m_stackHeight << endl; + _out << "Equivalence classes: " << endl; + for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass) + streamExpressionClass(_out, eqClass); + + _out << "Stack: " << endl; + for (auto const& it: m_stackElements) + { + _out << " " << dec << it.first << ": "; + streamExpressionClass(_out, it.second); + } + _out << "Storage: " << endl; + for (auto const& it: m_storageContent) + { + _out << " "; + streamExpressionClass(_out, it.first); + _out << ": "; + streamExpressionClass(_out, it.second); + } + _out << "Memory: " << endl; + for (auto const& it: m_memoryContent) + { + _out << " "; + streamExpressionClass(_out, it.first); + _out << ": "; + streamExpressionClass(_out, it.second); + } + + return _out; +} + +KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem) +{ + StoreOperation op; + if (_item.type() == Tag) + { + // can be ignored + } + else if (_item.type() != Operation) + { + assertThrow(_item.deposit() == 1, InvalidDeposit, ""); + setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem)); + } + else + { + Instruction instruction = _item.instruction(); + InstructionInfo info = instructionInfo(instruction); + if (SemanticInformation::isDupInstruction(_item)) + setStackElement( + m_stackHeight + 1, + stackElement( + m_stackHeight - int(instruction) + int(Instruction::DUP1), + _item.getLocation() + ) + ); + else if (SemanticInformation::isSwapInstruction(_item)) + swapStackElements( + m_stackHeight, + m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1), + _item.getLocation() + ); + else if (instruction != Instruction::POP) + { + vector<Id> arguments(info.args); + for (int i = 0; i < info.args; ++i) + arguments[i] = stackElement(m_stackHeight - i, _item.getLocation()); + + if (_item.instruction() == Instruction::SSTORE) + op = storeInStorage(arguments[0], arguments[1], _item.getLocation()); + else if (_item.instruction() == Instruction::SLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromStorage(arguments[0], _item.getLocation()) + ); + else if (_item.instruction() == Instruction::MSTORE) + op = storeInMemory(arguments[0], arguments[1], _item.getLocation()); + else if (_item.instruction() == Instruction::MLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromMemory(arguments[0], _item.getLocation()) + ); + else if (_item.instruction() == Instruction::SHA3) + setStackElement( + m_stackHeight + _item.deposit(), + applySha3(arguments.at(0), arguments.at(1), _item.getLocation()) + ); + else + { + if (SemanticInformation::invalidatesMemory(_item.instruction())) + resetMemory(); + if (SemanticInformation::invalidatesStorage(_item.instruction())) + resetStorage(); + assertThrow(info.ret <= 1, InvalidDeposit, ""); + if (info.ret == 1) + setStackElement( + m_stackHeight + _item.deposit(), + m_expressionClasses->find(_item, arguments, _copyItem) + ); + } + } + m_stackElements.erase( + m_stackElements.upper_bound(m_stackHeight + _item.deposit()), + m_stackElements.end() + ); + m_stackHeight += _item.deposit(); + } + return op; +} + +void KnownState::reduceToCommonKnowledge(KnownState const& /*_other*/) +{ + //@todo + *this = KnownState(m_expressionClasses); +} + +bool KnownState::operator==(const KnownState& _other) const +{ + //@todo + return ( + m_stackElements.empty() && + _other.m_stackElements.empty() && + m_storageContent.empty() && + _other.m_storageContent.empty() && + m_memoryContent.empty() && + _other.m_memoryContent.empty() + ); +} + +ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location) +{ + if (m_stackElements.count(_stackHeight)) + return m_stackElements.at(_stackHeight); + // Stack element not found (not assigned yet), create new unknown equivalence class. + //@todo check that we do not infer incorrect equivalences when the stack is cleared partially + //in between. + return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location); +} + +ExpressionClasses::Id KnownState::initialStackElement( + int _stackHeight, + SourceLocation const& _location +) +{ + // This is a special assembly item that refers to elements pre-existing on the initial stack. + return m_expressionClasses->find(AssemblyItem(UndefinedItem, u256(_stackHeight), _location)); +} + +void KnownState::setStackElement(int _stackHeight, Id _class) +{ + m_stackElements[_stackHeight] = _class; +} + +void KnownState::swapStackElements( + int _stackHeightA, + int _stackHeightB, + SourceLocation const& _location +) +{ + assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements."); + // ensure they are created + stackElement(_stackHeightA, _location); + stackElement(_stackHeightB, _location); + + swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]); +} + +KnownState::StoreOperation KnownState::storeInStorage( + Id _slot, + Id _value, + SourceLocation const& _location) +{ + if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value) + // do not execute the storage if we know that the value is already there + return StoreOperation(); + m_sequenceNumber++; + decltype(m_storageContent) storageContents; + // Copy over all values (i.e. retain knowledge about them) where we know that this store + // operation will not destroy the knowledge. Specifically, we copy storage locations we know + // are different from _slot or locations where we know that the stored value is equal to _value. + for (auto const& storageItem: m_storageContent) + if (m_expressionClasses->knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value) + storageContents.insert(storageItem); + m_storageContent = move(storageContents); + + AssemblyItem item(Instruction::SSTORE, _location); + Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber); + StoreOperation operation(StoreOperation::Storage, _slot, m_sequenceNumber, id); + m_storageContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; + + return operation; +} + +ExpressionClasses::Id KnownState::loadFromStorage(Id _slot, SourceLocation const& _location) +{ + if (m_storageContent.count(_slot)) + return m_storageContent.at(_slot); + + AssemblyItem item(Instruction::SLOAD, _location); + return m_storageContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber); +} + +KnownState::StoreOperation KnownState::storeInMemory(Id _slot, Id _value, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value) + // do not execute the store if we know that the value is already there + return StoreOperation(); + m_sequenceNumber++; + decltype(m_memoryContent) memoryContents; + // copy over values at points where we know that they are different from _slot by at least 32 + for (auto const& memoryItem: m_memoryContent) + if (m_expressionClasses->knownToBeDifferentBy32(memoryItem.first, _slot)) + memoryContents.insert(memoryItem); + m_memoryContent = move(memoryContents); + + AssemblyItem item(Instruction::MSTORE, _location); + Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber); + StoreOperation operation(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id)); + m_memoryContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; + return operation; +} + +ExpressionClasses::Id KnownState::loadFromMemory(Id _slot, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot)) + return m_memoryContent.at(_slot); + + AssemblyItem item(Instruction::MLOAD, _location); + return m_memoryContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber); +} + +KnownState::Id KnownState::applySha3( + Id _start, + Id _length, + SourceLocation const& _location +) +{ + AssemblyItem sha3Item(Instruction::SHA3, _location); + // Special logic if length is a short constant, otherwise we cannot tell. + u256 const* l = m_expressionClasses->knownConstant(_length); + // unknown or too large length + if (!l || *l > 128) + return m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber); + + vector<Id> arguments; + for (u256 i = 0; i < *l; i += 32) + { + Id slot = m_expressionClasses->find( + AssemblyItem(Instruction::ADD, _location), + {_start, m_expressionClasses->find(i)} + ); + arguments.push_back(loadFromMemory(slot, _location)); + } + if (m_knownSha3Hashes.count(arguments)) + return m_knownSha3Hashes.at(arguments); + Id v; + // If all arguments are known constants, compute the sha3 here + if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses->knownConstant(_a); })) + { + bytes data; + for (Id a: arguments) + data += toBigEndian(*m_expressionClasses->knownConstant(a)); + data.resize(size_t(*l)); + v = m_expressionClasses->find(AssemblyItem(u256(sha3(data)), _location)); + } + else + v = m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber); + return m_knownSha3Hashes[arguments] = v; +} + diff --git a/KnownState.h b/KnownState.h new file mode 100644 index 00000000..f7a3dd67 --- /dev/null +++ b/KnownState.h @@ -0,0 +1,163 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file KnownState.h + * @author Christian <c@ethdev.com> + * @date 2015 + * Contains knowledge about the state of the virtual machine at a specific instruction. + */ + +#pragma once + +#include <vector> +#include <map> +#include <set> +#include <tuple> +#include <memory> +#include <ostream> +#include <libdevcore/CommonIO.h> +#include <libdevcore/Exceptions.h> +#include <libevmasm/ExpressionClasses.h> +#include <libevmasm/SemanticInformation.h> + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector<AssemblyItem>; + +/** + * Class to infer and store knowledge about the state of the virtual machine at a specific + * instruction. + * + * The general workings are that for each assembly item that is fed, an equivalence class is + * derived from the operation and the equivalence class of its arguments. DUPi, SWAPi and some + * arithmetic instructions are used to infer equivalences while these classes are determined. + */ +class KnownState +{ +public: + using Id = ExpressionClasses::Id; + struct StoreOperation + { + enum Target { Invalid, Memory, Storage }; + StoreOperation(): target(Invalid), sequenceNumber(-1) {} + StoreOperation( + Target _target, + Id _slot, + unsigned _sequenceNumber, + Id _expression + ): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {} + bool isValid() const { return target != Invalid; } + Target target; + Id slot; + unsigned sequenceNumber; + Id expression; + }; + + explicit KnownState( + std::shared_ptr<ExpressionClasses> _expressionClasses = std::make_shared<ExpressionClasses>() + ): m_expressionClasses(_expressionClasses) + { + } + + /// Streams debugging information to @a _out. + std::ostream& stream(std::ostream& _out) const; + + /// Feeds the item into the system for analysis. + /// @returns a possible store operation + StoreOperation feedItem(AssemblyItem const& _item, bool _copyItem = false); + + /// Resets any knowledge about storage. + void resetStorage() { m_storageContent.clear(); } + /// Resets any knowledge about storage. + void resetMemory() { m_memoryContent.clear(); } + /// Resets any knowledge about the current stack. + void resetStack() { m_stackElements.clear(); m_stackHeight = 0; } + /// Resets any knowledge. + void reset() { resetStorage(); resetMemory(); resetStack(); } + + /// Manually increments the storage and memory sequence number. + void incrementSequenceNumber() { m_sequenceNumber += 2; } + + /// Replaces the state by the intersection with _other, i.e. only equal knowledge is retained. + /// If the stack heighht is different, the smaller one is used and the stack is compared + /// relatively. + void reduceToCommonKnowledge(KnownState const& _other); + + /// @returns a shared pointer to a copy of this state. + std::shared_ptr<KnownState> copy() const { return std::make_shared<KnownState>(*this); } + + /// @returns true if the knowledge about the state of both objects is (known to be) equal. + bool operator==(KnownState const& _other) const; + + ///@todo the sequence numbers in two copies of this class should never be the same. + /// might be doable using two-dimensional sequence numbers, where the first value is incremented + /// for each copy + + /// Retrieves the current equivalence class fo the given stack element (or generates a new + /// one if it does not exist yet). + Id stackElement(int _stackHeight, SourceLocation const& _location); + /// @returns the equivalence class id of the special initial stack element at the given height. + Id initialStackElement(int _stackHeight, SourceLocation const& _location); + + int stackHeight() const { return m_stackHeight; } + std::map<int, Id> const& stackElements() const { return m_stackElements; } + ExpressionClasses& expressionClasses() const { return *m_expressionClasses; } + +private: + /// Assigns a new equivalence class to the next sequence number of the given stack element. + void setStackElement(int _stackHeight, Id _class); + /// Swaps the given stack elements in their next sequence number. + void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location); + + /// Increments the sequence number, deletes all storage information that might be overwritten + /// and stores the new value at the given slot. + /// @returns the store operation, which might be invalid if storage was not modified + StoreOperation storeInStorage(Id _slot, Id _value, SourceLocation const& _location); + /// Retrieves the current value at the given slot in storage or creates a new special sload class. + Id loadFromStorage(Id _slot, SourceLocation const& _location); + /// Increments the sequence number, deletes all memory information that might be overwritten + /// and stores the new value at the given slot. + /// @returns the store operation, which might be invalid if memory was not modified + StoreOperation storeInMemory(Id _slot, Id _value, SourceLocation const& _location); + /// Retrieves the current value at the given slot in memory or creates a new special mload class. + Id loadFromMemory(Id _slot, SourceLocation const& _location); + /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory. + Id applySha3(Id _start, Id _length, SourceLocation const& _location); + + /// Current stack height, can be negative. + int m_stackHeight = 0; + /// Current stack layout, mapping stack height -> equivalence class + std::map<int, Id> m_stackElements; + /// Current sequence number, this is incremented with each modification to storage or memory. + unsigned m_sequenceNumber = 1; + /// Knowledge about storage content. + std::map<Id, Id> m_storageContent; + /// Knowledge about memory content. Keys are memory addresses, note that the values overlap + /// and are not contained here if they are not completely known. + std::map<Id, Id> m_memoryContent; + /// Keeps record of all sha3 hashes that are computed. + std::map<std::vector<Id>, Id> m_knownSha3Hashes; + /// Structure containing the classes of equivalent expressions. + std::shared_ptr<ExpressionClasses> m_expressionClasses; +}; + +} +} diff --git a/SemanticInformation.cpp b/SemanticInformation.cpp index 83d59efc..056162b5 100644 --- a/SemanticInformation.cpp +++ b/SemanticInformation.cpp @@ -122,3 +122,56 @@ bool SemanticInformation::altersControlFlow(AssemblyItem const& _item) return false; } } + + +bool SemanticInformation::isDeterministic(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return true; + + switch (_item.instruction()) + { + case Instruction::CALL: + case Instruction::CALLCODE: + case Instruction::CREATE: + case Instruction::GAS: + case Instruction::PC: + case Instruction::MSIZE: // depends on previous writes and reads, not only on content + case Instruction::BALANCE: // depends on previous calls + case Instruction::EXTCODESIZE: + return false; + default: + return true; + } +} + +bool SemanticInformation::invalidatesMemory(Instruction _instruction) +{ + switch (_instruction) + { + case Instruction::CALLDATACOPY: + case Instruction::CODECOPY: + case Instruction::EXTCODECOPY: + case Instruction::MSTORE: + case Instruction::MSTORE8: + case Instruction::CALL: + case Instruction::CALLCODE: + return true; + default: + return false; + } +} + +bool SemanticInformation::invalidatesStorage(Instruction _instruction) +{ + switch (_instruction) + { + case Instruction::CALL: + case Instruction::CALLCODE: + case Instruction::CREATE: + case Instruction::SSTORE: + return true; + default: + return false; + } +} diff --git a/SemanticInformation.h b/SemanticInformation.h index 27aa6f1a..094f4591 100644 --- a/SemanticInformation.h +++ b/SemanticInformation.h @@ -23,6 +23,7 @@ #pragma once +#include <libevmcore/Instruction.h> namespace dev { @@ -45,6 +46,13 @@ struct SemanticInformation static bool isSwapInstruction(AssemblyItem const& _item); static bool isJumpInstruction(AssemblyItem const& _item); static bool altersControlFlow(AssemblyItem const& _item); + /// @returns false if the value put on the stack by _item depends on anything else than + /// the information in the current block header, memory, storage or stack. + static bool isDeterministic(AssemblyItem const& _item); + /// @returns true if the given instruction modifies memory. + static bool invalidatesMemory(Instruction _instruction); + /// @returns true if the given instruction modifies storage (even indirectly). + static bool invalidatesStorage(Instruction _instruction); }; } |