From e543bd34c0b4884b5a27555f698f50af6a1c0b81 Mon Sep 17 00:00:00 2001 From: chriseth Date: Thu, 10 Nov 2016 18:16:21 +0100 Subject: Stored combined creation and runtime tags. Includes a change to Assembly to allow tags from sub-assemblies to be used. Sorry, this get a bit bigger than I thought. --- libevmasm/Assembly.cpp | 102 ++++++++++++++++++++++++---------------- libevmasm/Assembly.h | 6 ++- libevmasm/AssemblyItem.cpp | 23 +++++++++ libevmasm/AssemblyItem.h | 8 ++++ libevmasm/BlockDeduplicator.cpp | 37 +++++++++++---- libevmasm/BlockDeduplicator.h | 16 +++++++ libevmasm/ControlFlowGraph.h | 5 ++ 7 files changed, 145 insertions(+), 52 deletions(-) (limited to 'libevmasm') diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index e19b6b0d..23a8180b 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -308,9 +308,20 @@ void Assembly::injectStart(AssemblyItem const& _i) Assembly& Assembly::optimise(bool _enable, bool _isCreation, size_t _runs) { - if (!_enable) - return *this; + if (_enable) + optimiseInternal(_isCreation, _runs); + return *this; +} +map Assembly::optimiseInternal(bool _isCreation, size_t _runs) +{ + for (size_t subId = 0; subId < m_subs.size(); ++subId) + { + map subTagReplacements = m_subs[subId].optimiseInternal(false, _runs); + BlockDeduplicator::applyTagReplacement(m_items, subTagReplacements, subId); + } + + map tagReplacements; unsigned total = 0; for (unsigned count = 1; count > 0; total += count) { @@ -319,30 +330,39 @@ Assembly& Assembly::optimise(bool _enable, bool _isCreation, size_t _runs) // This only modifies PushTags, we have to run again to actually remove code. BlockDeduplicator dedup(m_items); if (dedup.deduplicate()) + { + tagReplacements.insert(dedup.replacedTags().begin(), dedup.replacedTags().end()); count++; + } { - // Control flow graph that resets knowledge at path joins. - ControlFlowGraph cfg(m_items, false); + // Control flow graph optimization has been here before but is disabled because it + // assumes we only jump to tags that are pushed. This is not the case anymore with + // function types that can be stored in storage. AssemblyItems optimisedItems; - for (BasicBlock const& block: cfg.optimisedBlocks()) + + auto iter = m_items.begin(); + while (iter != m_items.end()) { - // We used to start with the block's initial state but it caused - // too many inconsistencies. + auto end = iter; + while (end != m_items.end()) + if (SemanticInformation::altersControlFlow(*end++)) + break; + KnownState emptyState; CommonSubexpressionEliminator eliminator(emptyState); - auto iter = m_items.begin() + block.begin; - auto const end = m_items.begin() + block.end; - while (iter < end) + auto blockIter = iter; + auto const blockEnd = end; + while (blockIter < blockEnd) { - auto orig = iter; - iter = eliminator.feedItems(iter, end); + auto orig = blockIter; + blockIter = eliminator.feedItems(blockIter, blockEnd); bool shouldReplace = false; AssemblyItems optimisedChunk; try { optimisedChunk = eliminator.getOptimizedItems(); - shouldReplace = (optimisedChunk.size() < size_t(iter - orig)); + shouldReplace = (optimisedChunk.size() < size_t(blockIter - orig)); } catch (StackTooDeepException const&) { @@ -361,15 +381,11 @@ Assembly& Assembly::optimise(bool _enable, bool _isCreation, size_t _runs) optimisedItems += optimisedChunk; } else - copy(orig, iter, back_inserter(optimisedItems)); + copy(orig, blockIter, back_inserter(optimisedItems)); } + iter = end; } - if (optimisedItems.size() < m_items.size()) - { - m_items = move(optimisedItems); - count++; - } } } @@ -380,10 +396,7 @@ Assembly& Assembly::optimise(bool _enable, bool _isCreation, size_t _runs) m_items ); - for (auto& sub: m_subs) - sub.optimise(true, false, _runs); - - return *this; + return tagReplacements; } LinkerObject const& Assembly::assemble() const @@ -394,8 +407,8 @@ LinkerObject const& Assembly::assemble() const LinkerObject& ret = m_assembledObject; unsigned totalBytes = bytesRequired(); - vector tagPos(m_usedTags); - map tagRef; + m_tagPositionsInBytecode = vector(m_usedTags, -1); + map> tagRef; multimap dataRef; multimap subRef; vector sizeRef; ///< Pointers to code locations where the size of the program is inserted @@ -413,8 +426,8 @@ LinkerObject const& Assembly::assemble() const for (AssemblyItem const& i: m_items) { // store position of the invalid jump destination - if (i.type() != Tag && tagPos[0] == 0) - tagPos[0] = ret.bytecode.size(); + if (i.type() != Tag && m_tagPositionsInBytecode[0] == size_t(-1)) + m_tagPositionsInBytecode[0] = ret.bytecode.size(); switch (i.type()) { @@ -446,7 +459,7 @@ LinkerObject const& Assembly::assemble() const case PushTag: { ret.bytecode.push_back(tagPush); - tagRef[ret.bytecode.size()] = (unsigned)i.data(); + tagRef[ret.bytecode.size()] = i.splitForeignPushTag(); ret.bytecode.resize(ret.bytecode.size() + bytesPerTag); break; } @@ -484,26 +497,16 @@ LinkerObject const& Assembly::assemble() const ret.bytecode.resize(ret.bytecode.size() + 20); break; case Tag: - tagPos[(unsigned)i.data()] = ret.bytecode.size(); - assertThrow(ret.bytecode.size() < 0xffffffffL, AssemblyException, "Tag too large."); assertThrow(i.data() != 0, AssemblyException, ""); + assertThrow(i.splitForeignPushTag().first == size_t(-1), AssemblyException, "Foreign tag."); + assertThrow(ret.bytecode.size() < 0xffffffffL, AssemblyException, "Tag too large."); + m_tagPositionsInBytecode[size_t(i.data())] = ret.bytecode.size(); ret.bytecode.push_back((byte)Instruction::JUMPDEST); break; default: BOOST_THROW_EXCEPTION(InvalidOpcode()); } } - for (auto const& i: tagRef) - { - bytesRef r(ret.bytecode.data() + i.first, bytesPerTag); - auto tag = i.second; - if (tag >= tagPos.size()) - tag = 0; - if (tag == 0) - assertThrow(tagPos[tag] != 0, AssemblyException, ""); - - toBigEndian(tagPos[tag], r); - } if (!dataRef.empty() && !subRef.empty()) ret.bytecode.push_back(0); @@ -519,6 +522,23 @@ LinkerObject const& Assembly::assemble() const } ret.append(m_subs[i].assemble()); } + for (auto const& i: tagRef) + { + size_t subId; + size_t tagId; + tie(subId, tagId) = i.second; + assertThrow(subId == size_t(-1) || subId < m_subs.size(), AssemblyException, "Invalid sub id"); + std::vector const& tagPositions = + subId == size_t(-1) ? + m_tagPositionsInBytecode : + m_subs[subId].m_tagPositionsInBytecode; + assertThrow(tagId < tagPositions.size(), AssemblyException, "Reference to non-existing tag."); + size_t pos = tagPositions[tagId]; + assertThrow(pos != size_t(-1), AssemblyException, "Reference to tag without position."); + assertThrow(dev::bytesRequired(pos) <= bytesPerTag, AssemblyException, "Tag too large for reserved space."); + bytesRef r(ret.bytecode.data() + i.first, bytesPerTag); + toBigEndian(pos, r); + } for (auto const& dataItem: m_data) { auto references = dataRef.equal_range(dataItem.first); diff --git a/libevmasm/Assembly.h b/libevmasm/Assembly.h index dae1e1da..0ae81e1d 100644 --- a/libevmasm/Assembly.h +++ b/libevmasm/Assembly.h @@ -53,7 +53,6 @@ public: AssemblyItem newPushSubSize(u256 const& _subId) { return AssemblyItem(PushSubSize, _subId); } AssemblyItem newPushLibraryAddress(std::string const& _identifier); - AssemblyItem append() { return append(newTag()); } void append(Assembly const& _a); void append(Assembly const& _a, int _deposit); AssemblyItem const& append(AssemblyItem const& _i); @@ -110,6 +109,10 @@ public: ) const; protected: + /// Does the same operations as @a optimise, but should only be applied to a sub and + /// returns the replaced tags. + std::map optimiseInternal(bool _isCreation, size_t _runs); + std::string locationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const; void donePath() { if (m_totalDeposit != INT_MAX && m_totalDeposit != m_deposit) BOOST_THROW_EXCEPTION(InvalidDeposit()); } unsigned bytesRequired() const; @@ -129,6 +132,7 @@ protected: std::map m_libraries; ///< Identifiers of libraries to be linked. mutable LinkerObject m_assembledObject; + mutable std::vector m_tagPositionsInBytecode; int m_deposit = 0; int m_baseDeposit = 0; diff --git a/libevmasm/AssemblyItem.cpp b/libevmasm/AssemblyItem.cpp index 599ded85..2eae20af 100644 --- a/libevmasm/AssemblyItem.cpp +++ b/libevmasm/AssemblyItem.cpp @@ -26,6 +26,29 @@ using namespace std; using namespace dev; using namespace dev::eth; +AssemblyItem AssemblyItem::toSubAssemblyTag(size_t _subId) const +{ + assertThrow(m_data < (u256(1) << 64), Exception, "Tag already has subassembly set."); + + assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); + AssemblyItem r = *this; + r.m_type = PushTag; + r.setPushTagSubIdAndTag(_subId, size_t(m_data)); + return r; +} + +pair AssemblyItem::splitForeignPushTag() const +{ + assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); + return make_pair(size_t(m_data / (u256(1) << 64)) - 1, size_t(m_data)); +} + +void AssemblyItem::setPushTagSubIdAndTag(size_t _subId, size_t _tag) +{ + assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); + setData(_tag + ((u256(_subId) + 1) << 64)); +} + unsigned AssemblyItem::bytesRequired(unsigned _addressLength) const { switch (m_type) diff --git a/libevmasm/AssemblyItem.h b/libevmasm/AssemblyItem.h index 1c3d9789..1a2fb1e6 100644 --- a/libevmasm/AssemblyItem.h +++ b/libevmasm/AssemblyItem.h @@ -69,6 +69,14 @@ public: AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, m_data); } AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, m_data); } + /// Converts the tag to a subassembly tag. This has to be called in order to move a tag across assemblies. + /// @param _subId the identifier of the subassembly the tag is taken from. + AssemblyItem toSubAssemblyTag(size_t _subId) const; + /// @returns splits the data of the push tag into sub assembly id and actual tag id. + /// The sub assembly id of non-foreign push tags is -1. + std::pair splitForeignPushTag() const; + /// Sets sub-assembly part and tag for a push tag. + void setPushTagSubIdAndTag(size_t _subId, size_t _tag); AssemblyItemType type() const { return m_type; } u256 const& data() const { return m_data; } diff --git a/libevmasm/BlockDeduplicator.cpp b/libevmasm/BlockDeduplicator.cpp index 3bb7a797..18b595cd 100644 --- a/libevmasm/BlockDeduplicator.cpp +++ b/libevmasm/BlockDeduplicator.cpp @@ -77,7 +77,6 @@ bool BlockDeduplicator::deduplicate() { //@todo this should probably be optimized. set> blocksSeen(comparator); - map tagReplacement; for (size_t i = 0; i < m_items.size(); ++i) { if (m_items.at(i).type() != Tag) @@ -86,22 +85,40 @@ bool BlockDeduplicator::deduplicate() if (it == blocksSeen.end()) blocksSeen.insert(i); else - tagReplacement[m_items.at(i).data()] = m_items.at(*it).data(); + m_replacedTags[m_items.at(i).data()] = m_items.at(*it).data(); } - bool changed = false; - for (AssemblyItem& item: m_items) - if (item.type() == PushTag && tagReplacement.count(item.data())) - { - changed = true; - item.setData(tagReplacement.at(item.data())); - } - if (!changed) + if (!applyTagReplacement(m_items, m_replacedTags)) break; } return iterations > 0; } +bool BlockDeduplicator::applyTagReplacement( + AssemblyItems& _items, + map const& _replacements, + size_t _subId +) +{ + bool changed = false; + for (AssemblyItem& item: _items) + if (item.type() == PushTag) + { + size_t subId; + size_t tagId; + tie(subId, tagId) = item.splitForeignPushTag(); + if (subId != _subId) + continue; + auto it = _replacements.find(tagId); + if (it != _replacements.end()) + { + changed = true; + item.setPushTagSubIdAndTag(subId, size_t(it->second)); + } + } + return changed; +} + BlockDeduplicator::BlockIterator& BlockDeduplicator::BlockIterator::operator++() { if (it == end) diff --git a/libevmasm/BlockDeduplicator.h b/libevmasm/BlockDeduplicator.h index c48835fd..5ecab7f3 100644 --- a/libevmasm/BlockDeduplicator.h +++ b/libevmasm/BlockDeduplicator.h @@ -23,9 +23,12 @@ #pragma once +#include + #include #include #include +#include namespace dev { @@ -45,6 +48,18 @@ public: BlockDeduplicator(AssemblyItems& _items): m_items(_items) {} /// @returns true if something was changed bool deduplicate(); + /// @returns the tags that were replaced. + std::map const& replacedTags() const { return m_replacedTags; } + + /// Replaces all PushTag operations insied @a _items that match a key in + /// @a _replacements by the respective value. If @a _subID is not -1, only + /// apply the replacement for foreign tags from this sub id. + /// @returns true iff a replacement was performed. + static bool applyTagReplacement( + AssemblyItems& _items, + std::map const& _replacements, + size_t _subID = size_t(-1) + ); private: /// Iterator that skips tags and skips to the end if (all branches of) the control @@ -70,6 +85,7 @@ private: AssemblyItem const* replaceWith; }; + std::map m_replacedTags; AssemblyItems& m_items; }; diff --git a/libevmasm/ControlFlowGraph.h b/libevmasm/ControlFlowGraph.h index 03a1f717..78998262 100644 --- a/libevmasm/ControlFlowGraph.h +++ b/libevmasm/ControlFlowGraph.h @@ -89,6 +89,11 @@ struct BasicBlock using BasicBlocks = std::vector; +/** + * Control flow graph optimizer. + * ASSUMES THAT WE ONLY JUMP TO TAGS THAT WERE PREVIOUSLY PUSHED. THIS IS NOT TRUE ANYMORE + * NOW THAT FUNCTION TAGS CAN BE STORED IN STORAGE. + */ class ControlFlowGraph { public: -- cgit