diff options
-rw-r--r-- | Assembly.cpp | 485 | ||||
-rw-r--r-- | Assembly.h | 132 | ||||
-rw-r--r-- | AssemblyItem.cpp | 135 | ||||
-rw-r--r-- | AssemblyItem.h | 100 | ||||
-rw-r--r-- | CMakeLists.txt | 33 | ||||
-rw-r--r-- | CommonSubexpressionEliminator.cpp | 672 | ||||
-rw-r--r-- | CommonSubexpressionEliminator.h | 233 | ||||
-rw-r--r-- | ControlFlowGraph.cpp | 260 | ||||
-rw-r--r-- | ControlFlowGraph.h | 108 | ||||
-rw-r--r-- | Exceptions.h | 36 | ||||
-rw-r--r-- | ExpressionClasses.cpp | 438 | ||||
-rw-r--r-- | ExpressionClasses.h | 181 | ||||
-rw-r--r-- | SemanticInformation.cpp | 124 | ||||
-rw-r--r-- | SemanticInformation.h | 51 | ||||
-rw-r--r-- | SourceLocation.h | 89 |
15 files changed, 3077 insertions, 0 deletions
diff --git a/Assembly.cpp b/Assembly.cpp new file mode 100644 index 00000000..6cc09a4b --- /dev/null +++ b/Assembly.cpp @@ -0,0 +1,485 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Assembly.cpp + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#include "Assembly.h" +#include <fstream> +#include <libdevcore/Log.h> +#include <libevmasm/CommonSubexpressionEliminator.h> +#include <libevmasm/ControlFlowGraph.h> +#include <json/json.h> +using namespace std; +using namespace dev; +using namespace dev::eth; + +void Assembly::append(Assembly const& _a) +{ + auto newDeposit = m_deposit + _a.deposit(); + for (AssemblyItem i: _a.m_items) + { + if (i.type() == Tag || i.type() == PushTag) + i.setData(i.data() + m_usedTags); + else if (i.type() == PushSub || i.type() == PushSubSize) + i.setData(i.data() + m_usedTags); + append(i); + } + m_deposit = newDeposit; + m_usedTags += _a.m_usedTags; + for (auto const& i: _a.m_data) + m_data.insert(i); + for (auto const& i: _a.m_strings) + m_strings.insert(i); + for (auto const& i: _a.m_subs) + m_subs.push_back(i); + + assert(!_a.m_baseDeposit); + assert(!_a.m_totalDeposit); +} + +void Assembly::append(Assembly const& _a, int _deposit) +{ + if (_deposit > _a.m_deposit) + BOOST_THROW_EXCEPTION(InvalidDeposit()); + else + { + append(_a); + while (_deposit++ < _a.m_deposit) + append(Instruction::POP); + } +} + +string Assembly::out() const +{ + stringstream ret; + stream(ret); + return ret.str(); +} + +unsigned Assembly::bytesRequired() const +{ + for (unsigned br = 1;; ++br) + { + unsigned ret = 1; + for (auto const& i: m_data) + ret += i.second.size(); + + for (AssemblyItem const& i: m_items) + ret += i.bytesRequired(br); + if (dev::bytesRequired(ret) <= br) + return ret; + } +} + +string Assembly::getLocationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const +{ + if (_location.isEmpty() || _sourceCodes.empty() || _location.start >= _location.end || _location.start < 0) + return ""; + + auto it = _sourceCodes.find(*_location.sourceName); + if (it == _sourceCodes.end()) + return ""; + + string const& source = it->second; + if (size_t(_location.start) >= source.size()) + return ""; + + string cut = source.substr(_location.start, _location.end - _location.start); + auto newLinePos = cut.find_first_of("\n"); + if (newLinePos != string::npos) + cut = cut.substr(0, newLinePos) + "..."; + + return move(cut); +} + +ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap const& _sourceCodes) const +{ + _out << _prefix << ".code:" << endl; + for (AssemblyItem const& i: m_items) + { + _out << _prefix; + switch (i.type()) + { + case Operation: + _out << " " << instructionInfo(i.instruction()).name << "\t" << i.getJumpTypeAsString(); + break; + case Push: + _out << " PUSH " << i.data(); + break; + case PushString: + _out << " PUSH \"" << m_strings.at((h256)i.data()) << "\""; + break; + case PushTag: + _out << " PUSH [tag" << i.data() << "]"; + break; + case PushSub: + _out << " PUSH [$" << h256(i.data()).abridged() << "]"; + break; + case PushSubSize: + _out << " PUSH #[$" << h256(i.data()).abridged() << "]"; + break; + case PushProgramSize: + _out << " PUSHSIZE"; + break; + case Tag: + _out << "tag" << i.data() << ": " << endl << _prefix << " JUMPDEST"; + break; + case PushData: + _out << " PUSH [" << hex << (unsigned)i.data() << "]"; + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + _out << "\t\t" << getLocationFromSources(_sourceCodes, i.getLocation()) << endl; + } + + if (!m_data.empty() || !m_subs.empty()) + { + _out << _prefix << ".data:" << endl; + for (auto const& i: m_data) + if (u256(i.first) >= m_subs.size()) + _out << _prefix << " " << hex << (unsigned)(u256)i.first << ": " << toHex(i.second) << endl; + for (size_t i = 0; i < m_subs.size(); ++i) + { + _out << _prefix << " " << hex << i << ": " << endl; + m_subs[i].stream(_out, _prefix + " ", _sourceCodes); + } + } + return _out; +} + +Json::Value Assembly::createJsonValue(string _name, int _begin, int _end, string _value, string _jumpType) const +{ + Json::Value value; + value["name"] = _name; + value["begin"] = _begin; + value["end"] = _end; + if (!_value.empty()) + value["value"] = _value; + if (!_jumpType.empty()) + value["jumpType"] = _jumpType; + return value; +} + +string toStringInHex(u256 _value) +{ + std::stringstream hexStr; + hexStr << hex << _value; + return hexStr.str(); +} + +Json::Value Assembly::streamAsmJson(ostream& _out, StringMap const& _sourceCodes) const +{ + Json::Value root; + + Json::Value collection(Json::arrayValue); + for (AssemblyItem const& i: m_items) + { + switch (i.type()) + { + case Operation: + collection.append( + createJsonValue(instructionInfo(i.instruction()).name, i.getLocation().start, i.getLocation().end, i.getJumpTypeAsString())); + break; + case Push: + collection.append( + createJsonValue("PUSH", i.getLocation().start, i.getLocation().end, toStringInHex(i.data()), i.getJumpTypeAsString())); + break; + case PushString: + collection.append( + createJsonValue("PUSH tag", i.getLocation().start, i.getLocation().end, m_strings.at((h256)i.data()))); + break; + case PushTag: + collection.append( + createJsonValue("PUSH [tag]", i.getLocation().start, i.getLocation().end, toStringInHex(i.data()))); + break; + case PushSub: + collection.append( + createJsonValue("PUSH [$]", i.getLocation().start, i.getLocation().end, dev::toString(h256(i.data())))); + break; + case PushSubSize: + collection.append( + createJsonValue("PUSH #[$]", i.getLocation().start, i.getLocation().end, dev::toString(h256(i.data())))); + break; + case PushProgramSize: + collection.append( + createJsonValue("PUSHSIZE", i.getLocation().start, i.getLocation().end)); + break; + case Tag: + { + collection.append( + createJsonValue("tag", i.getLocation().start, i.getLocation().end, string(i.data()))); + collection.append( + createJsonValue("JUMDEST", i.getLocation().start, i.getLocation().end)); + } + break; + case PushData: + { + Json::Value pushData; + pushData["name"] = "PUSH hex"; + collection.append(createJsonValue("PUSH hex", i.getLocation().start, i.getLocation().end, toStringInHex(i.data()))); + } + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + } + + root[".code"] = collection; + + if (!m_data.empty() || !m_subs.empty()) + { + Json::Value data; + for (auto const& i: m_data) + if (u256(i.first) >= m_subs.size()) + data[toStringInHex((u256)i.first)] = toHex(i.second); + + for (size_t i = 0; i < m_subs.size(); ++i) + { + std::stringstream hexStr; + hexStr << hex << i; + data[hexStr.str()] = m_subs[i].stream(_out, "", _sourceCodes, true); + } + root[".data"] = data; + _out << root; + } + return root; +} + +Json::Value Assembly::stream(ostream& _out, string const& _prefix, StringMap const& _sourceCodes, bool _inJsonFormat) const +{ + if (_inJsonFormat) + return streamAsmJson(_out, _sourceCodes); + else + { + streamAsm(_out, _prefix, _sourceCodes); + return Json::Value(); + } +} + +AssemblyItem const& Assembly::append(AssemblyItem const& _i) +{ + m_deposit += _i.deposit(); + m_items.push_back(_i); + if (m_items.back().getLocation().isEmpty() && !m_currentSourceLocation.isEmpty()) + m_items.back().setLocation(m_currentSourceLocation); + return back(); +} + +void Assembly::injectStart(AssemblyItem const& _i) +{ + m_items.insert(m_items.begin(), _i); +} + +inline bool matches(AssemblyItemsConstRef _a, AssemblyItemsConstRef _b) +{ + if (_a.size() != _b.size()) + return false; + for (unsigned i = 0; i < _a.size(); ++i) + if (!_a[i].match(_b[i])) + return false; + return true; +} + +struct OptimiserChannel: public LogChannel { static const char* name() { return "OPT"; } static const int verbosity = 12; }; +#define copt dev::LogOutputStream<OptimiserChannel, true>() + +Assembly& Assembly::optimise(bool _enable) +{ + if (!_enable) + return *this; + std::vector<pair<AssemblyItems, function<AssemblyItems(AssemblyItemsConstRef)>>> rules; + // jump to next instruction + rules.push_back({ { PushTag, Instruction::JUMP, Tag }, [](AssemblyItemsConstRef m) -> AssemblyItems { if (m[0].data() == m[2].data()) return {m[2]}; else return m.toVector(); }}); + + unsigned total = 0; + for (unsigned count = 1; count > 0; total += count) + { + copt << toString(*this); + count = 0; + + copt << "Performing control flow analysis..."; + { + ControlFlowGraph cfg(m_items); + AssemblyItems optItems = cfg.optimisedItems(); + if (optItems.size() < m_items.size()) + { + copt << "Old size: " << m_items.size() << ", new size: " << optItems.size(); + m_items = move(optItems); + count++; + } + } + + copt << "Performing common subexpression elimination..."; + for (auto iter = m_items.begin(); iter != m_items.end();) + { + CommonSubexpressionEliminator eliminator; + auto orig = iter; + iter = eliminator.feedItems(iter, m_items.end()); + AssemblyItems optItems; + bool shouldReplace = false; + try + { + optItems = eliminator.getOptimizedItems(); + shouldReplace = (optItems.size() < size_t(iter - orig)); + } + catch (StackTooDeepException const&) + { + // This might happen if the opcode reconstruction is not as efficient + // as the hand-crafted code. + } + + if (shouldReplace) + { + copt << "Old size: " << (iter - orig) << ", new size: " << optItems.size(); + count++; + for (auto moveIter = optItems.begin(); moveIter != optItems.end(); ++orig, ++moveIter) + *orig = move(*moveIter); + iter = m_items.erase(orig, iter); + } + } + } + + copt << total << " optimisations done."; + + for (auto& sub: m_subs) + sub.optimise(true); + + return *this; +} + +bytes Assembly::assemble() const +{ + bytes ret; + + unsigned totalBytes = bytesRequired(); + vector<unsigned> tagPos(m_usedTags); + map<unsigned, unsigned> tagRef; + multimap<h256, unsigned> dataRef; + vector<unsigned> sizeRef; ///< Pointers to code locations where the size of the program is inserted + unsigned bytesPerTag = dev::bytesRequired(totalBytes); + byte tagPush = (byte)Instruction::PUSH1 - 1 + bytesPerTag; + + for (size_t i = 0; i < m_subs.size(); ++i) + m_data[u256(i)] = m_subs[i].assemble(); + + unsigned bytesRequiredIncludingData = bytesRequired(); + unsigned bytesPerDataRef = dev::bytesRequired(bytesRequiredIncludingData); + byte dataRefPush = (byte)Instruction::PUSH1 - 1 + bytesPerDataRef; + ret.reserve(bytesRequiredIncludingData); + // m_data must not change from here on + + for (AssemblyItem const& i: m_items) + switch (i.type()) + { + case Operation: + ret.push_back((byte)i.data()); + break; + case PushString: + { + ret.push_back((byte)Instruction::PUSH32); + unsigned ii = 0; + for (auto j: m_strings.at((h256)i.data())) + if (++ii > 32) + break; + else + ret.push_back((byte)j); + while (ii++ < 32) + ret.push_back(0); + break; + } + case Push: + { + byte b = max<unsigned>(1, dev::bytesRequired(i.data())); + ret.push_back((byte)Instruction::PUSH1 - 1 + b); + ret.resize(ret.size() + b); + bytesRef byr(&ret.back() + 1 - b, b); + toBigEndian(i.data(), byr); + break; + } + case PushTag: + { + ret.push_back(tagPush); + tagRef[ret.size()] = (unsigned)i.data(); + ret.resize(ret.size() + bytesPerTag); + break; + } + case PushData: case PushSub: + { + ret.push_back(dataRefPush); + dataRef.insert(make_pair((h256)i.data(), ret.size())); + ret.resize(ret.size() + bytesPerDataRef); + break; + } + case PushSubSize: + { + auto s = m_data[i.data()].size(); + byte b = max<unsigned>(1, dev::bytesRequired(s)); + ret.push_back((byte)Instruction::PUSH1 - 1 + b); + ret.resize(ret.size() + b); + bytesRef byr(&ret.back() + 1 - b, b); + toBigEndian(s, byr); + break; + } + case PushProgramSize: + { + ret.push_back(dataRefPush); + sizeRef.push_back(ret.size()); + ret.resize(ret.size() + bytesPerDataRef); + break; + } + case Tag: + tagPos[(unsigned)i.data()] = ret.size(); + ret.push_back((byte)Instruction::JUMPDEST); + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + + for (auto const& i: tagRef) + { + bytesRef r(ret.data() + i.first, bytesPerTag); + toBigEndian(tagPos[i.second], r); + } + + if (!m_data.empty()) + { + ret.push_back(0); + for (auto const& i: m_data) + { + auto its = dataRef.equal_range(i.first); + if (its.first != its.second) + { + for (auto it = its.first; it != its.second; ++it) + { + bytesRef r(ret.data() + it->second, bytesPerDataRef); + toBigEndian(ret.size(), r); + } + for (auto b: i.second) + ret.push_back(b); + } + } + } + for (unsigned pos: sizeRef) + { + bytesRef r(ret.data() + pos, bytesPerDataRef); + toBigEndian(ret.size(), r); + } + return ret; +} diff --git a/Assembly.h b/Assembly.h new file mode 100644 index 00000000..b4850f7d --- /dev/null +++ b/Assembly.h @@ -0,0 +1,132 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Assembly.h + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#pragma once + +#include <iostream> +#include <sstream> +#include <libdevcore/Common.h> +#include <libdevcore/Assertions.h> +#include <libevmcore/Instruction.h> +#include <libevmasm/SourceLocation.h> +#include <libevmasm/AssemblyItem.h> +#include "Exceptions.h" +#include <json/json.h> + +namespace Json +{ +class Value; +} +namespace dev +{ +namespace eth +{ + +class Assembly +{ +public: + Assembly() {} + + AssemblyItem newTag() { return AssemblyItem(Tag, m_usedTags++); } + AssemblyItem newPushTag() { return AssemblyItem(PushTag, m_usedTags++); } + AssemblyItem newData(bytes const& _data) { h256 h = (u256)std::hash<std::string>()(asString(_data)); m_data[h] = _data; return AssemblyItem(PushData, h); } + AssemblyItem newSub(Assembly const& _sub) { m_subs.push_back(_sub); return AssemblyItem(PushSub, m_subs.size() - 1); } + AssemblyItem newPushString(std::string const& _data) { h256 h = (u256)std::hash<std::string>()(_data); m_strings[h] = _data; return AssemblyItem(PushString, h); } + AssemblyItem newPushSubSize(u256 const& _subId) { return AssemblyItem(PushSubSize, _subId); } + + AssemblyItem append() { return append(newTag()); } + void append(Assembly const& _a); + void append(Assembly const& _a, int _deposit); + AssemblyItem const& append(AssemblyItem const& _i); + AssemblyItem const& append(std::string const& _data) { return append(newPushString(_data)); } + AssemblyItem const& append(bytes const& _data) { return append(newData(_data)); } + AssemblyItem appendSubSize(Assembly const& _a) { auto ret = newSub(_a); append(newPushSubSize(ret.data())); return ret; } + /// Pushes the final size of the current assembly itself. Use this when the code is modified + /// after compilation and CODESIZE is not an option. + void appendProgramSize() { append(AssemblyItem(PushProgramSize)); } + + AssemblyItem appendJump() { auto ret = append(newPushTag()); append(Instruction::JUMP); return ret; } + AssemblyItem appendJumpI() { auto ret = append(newPushTag()); append(Instruction::JUMPI); return ret; } + AssemblyItem appendJump(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMP); return ret; } + AssemblyItem appendJumpI(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMPI); return ret; } + template <class T> Assembly& operator<<(T const& _d) { append(_d); return *this; } + AssemblyItems const& getItems() const { return m_items; } + AssemblyItem const& back() const { return m_items.back(); } + std::string backString() const { return m_items.size() && m_items.back().type() == PushString ? m_strings.at((h256)m_items.back().data()) : std::string(); } + + void onePath() { if (asserts(!m_totalDeposit && !m_baseDeposit)) BOOST_THROW_EXCEPTION(InvalidDeposit()); m_baseDeposit = m_deposit; m_totalDeposit = INT_MAX; } + void otherPath() { donePath(); m_totalDeposit = m_deposit; m_deposit = m_baseDeposit; } + void donePaths() { donePath(); m_totalDeposit = m_baseDeposit = 0; } + void ignored() { m_baseDeposit = m_deposit; } + void endIgnored() { m_deposit = m_baseDeposit; m_baseDeposit = 0; } + + void popTo(int _deposit) { while (m_deposit > _deposit) append(Instruction::POP); } + + void injectStart(AssemblyItem const& _i); + std::string out() const; + int deposit() const { return m_deposit; } + void adjustDeposit(int _adjustment) { m_deposit += _adjustment; if (asserts(m_deposit >= 0)) BOOST_THROW_EXCEPTION(InvalidDeposit()); } + void setDeposit(int _deposit) { m_deposit = _deposit; if (asserts(m_deposit >= 0)) BOOST_THROW_EXCEPTION(InvalidDeposit()); } + + /// Changes the source location used for each appended item. + void setSourceLocation(SourceLocation const& _location) { m_currentSourceLocation = _location; } + + bytes assemble() const; + Assembly& optimise(bool _enable); + Json::Value stream( + std::ostream& _out, + std::string const& _prefix = "", + const StringMap &_sourceCodes = StringMap(), + bool _inJsonFormat = false + ) const; + +protected: + std::string getLocationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const; + void donePath() { if (m_totalDeposit != INT_MAX && m_totalDeposit != m_deposit) BOOST_THROW_EXCEPTION(InvalidDeposit()); } + unsigned bytesRequired() const; + +private: + Json::Value streamAsmJson(std::ostream& _out, const StringMap &_sourceCodes) const; + std::ostream& streamAsm(std::ostream& _out, std::string const& _prefix, StringMap const& _sourceCodes) const; + Json::Value createJsonValue(std::string _name, int _begin, int _end, std::string _value = std::string(), std::string _jumpType = std::string()) const; + +protected: + unsigned m_usedTags = 0; + AssemblyItems m_items; + mutable std::map<h256, bytes> m_data; + std::vector<Assembly> m_subs; + std::map<h256, std::string> m_strings; + + int m_deposit = 0; + int m_baseDeposit = 0; + int m_totalDeposit = 0; + + SourceLocation m_currentSourceLocation; +}; + +inline std::ostream& operator<<(std::ostream& _out, Assembly const& _a) +{ + _a.stream(_out); + return _out; +} + +} +} diff --git a/AssemblyItem.cpp b/AssemblyItem.cpp new file mode 100644 index 00000000..a4485a14 --- /dev/null +++ b/AssemblyItem.cpp @@ -0,0 +1,135 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Assembly.cpp + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#include "AssemblyItem.h" +#include <fstream> + +using namespace std; +using namespace dev; +using namespace dev::eth; + +unsigned AssemblyItem::bytesRequired(unsigned _addressLength) const +{ + switch (m_type) + { + case Operation: + case Tag: // 1 byte for the JUMPDEST + return 1; + case PushString: + return 33; + case Push: + return 1 + max<unsigned>(1, dev::bytesRequired(m_data)); + case PushSubSize: + case PushProgramSize: + return 4; // worst case: a 16MB program + case PushTag: + case PushData: + case PushSub: + return 1 + _addressLength; + default: + break; + } + BOOST_THROW_EXCEPTION(InvalidOpcode()); +} + +int AssemblyItem::deposit() const +{ + switch (m_type) + { + case Operation: + return instructionInfo(instruction()).ret - instructionInfo(instruction()).args; + case Push: + case PushString: + case PushTag: + case PushData: + case PushSub: + case PushSubSize: + case PushProgramSize: + return 1; + case Tag: + return 0; + default:; + } + return 0; +} + +string AssemblyItem::getJumpTypeAsString() const +{ + switch (m_jumpType) + { + case JumpType::IntoFunction: + return "[in]"; + case JumpType::OutOfFunction: + return "[out]"; + case JumpType::Ordinary: + default: + return ""; + } +} + +ostream& dev::eth::operator<<(ostream& _out, AssemblyItem const& _item) +{ + switch (_item.type()) + { + case Operation: + _out << " " << instructionInfo(_item.instruction()).name; + if (_item.instruction() == eth::Instruction::JUMP || _item.instruction() == eth::Instruction::JUMPI) + _out << "\t" << _item.getJumpTypeAsString(); + break; + case Push: + _out << " PUSH " << hex << _item.data(); + break; + case PushString: + _out << " PushString" << hex << (unsigned)_item.data(); + break; + case PushTag: + _out << " PushTag " << _item.data(); + break; + case Tag: + _out << " Tag " << _item.data(); + break; + case PushData: + _out << " PushData " << hex << (unsigned)_item.data(); + break; + case PushSub: + _out << " PushSub " << hex << h256(_item.data()).abridged(); + break; + case PushSubSize: + _out << " PushSubSize " << hex << h256(_item.data()).abridged(); + break; + case PushProgramSize: + _out << " PushProgramSize"; + break; + case UndefinedItem: + _out << " ???"; + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + return _out; +} + +ostream& dev::eth::operator<<(ostream& _out, AssemblyItemsConstRef _i) +{ + for (AssemblyItem const& i: _i) + _out << i; + return _out; +} diff --git a/AssemblyItem.h b/AssemblyItem.h new file mode 100644 index 00000000..6f2a65de --- /dev/null +++ b/AssemblyItem.h @@ -0,0 +1,100 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Assembly.h + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#pragma once + +#include <iostream> +#include <sstream> +#include <libdevcore/Common.h> +#include <libdevcore/Assertions.h> +#include <libevmcore/Instruction.h> +#include <libevmasm/SourceLocation.h> +#include "Exceptions.h" + +namespace dev +{ +namespace eth +{ + +enum AssemblyItemType { UndefinedItem, Operation, Push, PushString, PushTag, PushSub, PushSubSize, PushProgramSize, Tag, PushData }; + +class Assembly; + +class AssemblyItem +{ +public: + enum class JumpType { Ordinary, IntoFunction, OutOfFunction }; + + AssemblyItem(u256 _push, SourceLocation const& _location = SourceLocation()): + AssemblyItem(Push, _push, _location) { } + AssemblyItem(Instruction _i, SourceLocation const& _location = SourceLocation()): + AssemblyItem(Operation, byte(_i), _location) { } + AssemblyItem(AssemblyItemType _type, u256 _data = 0, SourceLocation const& _location = SourceLocation()): + m_type(_type), + m_data(_data), + m_location(_location) + { + } + + AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, m_data); } + AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, m_data); } + + AssemblyItemType type() const { return m_type; } + u256 const& data() const { return m_data; } + void setType(AssemblyItemType const _type) { m_type = _type; } + void setData(u256 const& _data) { m_data = _data; } + + /// @returns the instruction of this item (only valid if type() == Operation) + Instruction instruction() const { return Instruction(byte(m_data)); } + + /// @returns true iff the type and data of the items are equal. + bool operator==(AssemblyItem const& _other) const { return m_type == _other.m_type && m_data == _other.m_data; } + bool operator!=(AssemblyItem const& _other) const { return !operator==(_other); } + + /// @returns an upper bound for the number of bytes required by this item, assuming that + /// the value of a jump tag takes @a _addressLength bytes. + unsigned bytesRequired(unsigned _addressLength) const; + int deposit() const; + + bool match(AssemblyItem const& _i) const { return _i.m_type == UndefinedItem || (m_type == _i.m_type && (m_type != Operation || m_data == _i.m_data)); } + void setLocation(SourceLocation const& _location) { m_location = _location; } + SourceLocation const& getLocation() const { return m_location; } + + void setJumpType(JumpType _jumpType) { m_jumpType = _jumpType; } + JumpType getJumpType() const { return m_jumpType; } + std::string getJumpTypeAsString() const; + +private: + AssemblyItemType m_type; + u256 m_data; + SourceLocation m_location; + JumpType m_jumpType = JumpType::Ordinary; +}; + +using AssemblyItems = std::vector<AssemblyItem>; +using AssemblyItemsConstRef = vector_ref<AssemblyItem const>; + +std::ostream& operator<<(std::ostream& _out, AssemblyItem const& _item); +std::ostream& operator<<(std::ostream& _out, AssemblyItemsConstRef _i); +inline std::ostream& operator<<(std::ostream& _out, AssemblyItems const& _i) { return operator<<(_out, AssemblyItemsConstRef(&_i)); } + +} +} diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..f8150806 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,33 @@ +cmake_policy(SET CMP0015 NEW) +# this policy was introduced in cmake 3.0 +# remove if, once 3.0 will be used on unix +if (${CMAKE_MAJOR_VERSION} GREATER 2) + # old policy do not use MACOSX_RPATH + cmake_policy(SET CMP0042 OLD) +endif() +set(CMAKE_AUTOMOC OFF) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB") + +aux_source_directory(. SRC_LIST) + +include_directories(BEFORE ${JSONCPP_INCLUDE_DIRS}) +include_directories(BEFORE ..) +include_directories(${Boost_INCLUDE_DIRS}) + +set(EXECUTABLE evmasm) + +file(GLOB HEADERS "*.h") + +if (ETH_STATIC) + add_library(${EXECUTABLE} STATIC ${SRC_LIST} ${HEADERS}) +else() + add_library(${EXECUTABLE} SHARED ${SRC_LIST} ${HEADERS}) +endif() + +target_link_libraries(${EXECUTABLE} evmcore) +target_link_libraries(${EXECUTABLE} devcrypto) + +install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) +install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) + diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp new file mode 100644 index 00000000..63524d6f --- /dev/null +++ b/CommonSubexpressionEliminator.cpp @@ -0,0 +1,672 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file CommonSubexpressionEliminator.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Optimizer step for common subexpression elimination and stack reorganisation. + */ + +#include <functional> +#include <boost/range/adaptor/reversed.hpp> +#include <libdevcrypto/SHA3.h> +#include <libevmasm/CommonSubexpressionEliminator.h> +#include <libevmasm/AssemblyItem.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + +vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems() +{ + optimizeBreakingItem(); + + map<int, Id> initialStackContents; + map<int, Id> targetStackContents; + int minHeight = m_stackHeight + 1; + if (!m_stackElements.empty()) + minHeight = min(minHeight, m_stackElements.begin()->first); + for (int height = minHeight; height <= 0; ++height) + initialStackContents[height] = initialStackElement(height, SourceLocation()); + for (int height = minHeight; height <= m_stackHeight; ++height) + targetStackContents[height] = stackElement(height, SourceLocation()); + + // Debug info: + //stream(cout, initialStackContents, targetStackContents); + + AssemblyItems items = CSECodeGenerator(m_expressionClasses, m_storeOperations).generateCode( + initialStackContents, + targetStackContents + ); + if (m_breakingItem) + items.push_back(*m_breakingItem); + return items; +} + +ostream& CommonSubexpressionEliminator::stream( + ostream& _out, + map<int, Id> _initialStack, + map<int, Id> _targetStack +) const +{ + auto streamExpressionClass = [this](ostream& _out, Id _id) + { + auto const& expr = m_expressionClasses.representative(_id); + _out << " " << dec << _id << ": " << *expr.item; + if (expr.sequenceNumber) + _out << "@" << dec << expr.sequenceNumber; + _out << "("; + for (Id arg: expr.arguments) + _out << dec << arg << ","; + _out << ")" << endl; + }; + + _out << "Optimizer analysis:" << endl; + _out << "Final stack height: " << dec << m_stackHeight << endl; + _out << "Equivalence classes: " << endl; + for (Id eqClass = 0; eqClass < m_expressionClasses.size(); ++eqClass) + streamExpressionClass(_out, eqClass); + + _out << "Initial stack: " << endl; + for (auto const& it: _initialStack) + { + _out << " " << dec << it.first << ": "; + streamExpressionClass(_out, it.second); + } + _out << "Target stack: " << endl; + for (auto const& it: _targetStack) + { + _out << " " << dec << it.first << ": "; + streamExpressionClass(_out, it.second); + } + + return _out; +} + +void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _copyItem) +{ + if (_item.type() != Operation) + { + assertThrow(_item.deposit() == 1, InvalidDeposit, ""); + setStackElement(++m_stackHeight, m_expressionClasses.find(_item, {}, _copyItem)); + } + else + { + Instruction instruction = _item.instruction(); + InstructionInfo info = instructionInfo(instruction); + if (SemanticInformation::isDupInstruction(_item)) + setStackElement( + m_stackHeight + 1, + stackElement( + m_stackHeight - int(instruction) + int(Instruction::DUP1), + _item.getLocation() + ) + ); + else if (SemanticInformation::isSwapInstruction(_item)) + swapStackElements( + m_stackHeight, + m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1), + _item.getLocation() + ); + else if (instruction != Instruction::POP) + { + vector<Id> arguments(info.args); + for (int i = 0; i < info.args; ++i) + arguments[i] = stackElement(m_stackHeight - i, _item.getLocation()); + if (_item.instruction() == Instruction::SSTORE) + storeInStorage(arguments[0], arguments[1], _item.getLocation()); + else if (_item.instruction() == Instruction::SLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromStorage(arguments[0], _item.getLocation()) + ); + else if (_item.instruction() == Instruction::MSTORE) + storeInMemory(arguments[0], arguments[1], _item.getLocation()); + else if (_item.instruction() == Instruction::MLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromMemory(arguments[0], _item.getLocation()) + ); + else if (_item.instruction() == Instruction::SHA3) + setStackElement( + m_stackHeight + _item.deposit(), + applySha3(arguments.at(0), arguments.at(1), _item.getLocation()) + ); + else + setStackElement( + m_stackHeight + _item.deposit(), + m_expressionClasses.find(_item, arguments, _copyItem) + ); + } + m_stackHeight += _item.deposit(); + } +} + +void CommonSubexpressionEliminator::optimizeBreakingItem() +{ + if (!m_breakingItem || *m_breakingItem != AssemblyItem(Instruction::JUMPI)) + return; + + SourceLocation const& location = m_breakingItem->getLocation(); + AssemblyItem::JumpType jumpType = m_breakingItem->getJumpType(); + + Id condition = stackElement(m_stackHeight - 1, location); + Id zero = m_expressionClasses.find(u256(0)); + if (m_expressionClasses.knownToBeDifferent(condition, zero)) + { + feedItem(AssemblyItem(Instruction::SWAP1, location), true); + feedItem(AssemblyItem(Instruction::POP, location), true); + + AssemblyItem item(Instruction::JUMP, location); + item.setJumpType(jumpType); + m_breakingItem = m_expressionClasses.storeItem(item); + return; + } + Id negatedCondition = m_expressionClasses.find(Instruction::ISZERO, {condition}); + if (m_expressionClasses.knownToBeDifferent(negatedCondition, zero)) + { + AssemblyItem it(Instruction::POP, location); + feedItem(it, true); + feedItem(it, true); + m_breakingItem = nullptr; + } +} + +void CommonSubexpressionEliminator::setStackElement(int _stackHeight, Id _class) +{ + m_stackElements[_stackHeight] = _class; +} + +void CommonSubexpressionEliminator::swapStackElements( + int _stackHeightA, + int _stackHeightB, + SourceLocation const& _location +) +{ + assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements."); + // ensure they are created + stackElement(_stackHeightA, _location); + stackElement(_stackHeightB, _location); + + swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]); +} + +ExpressionClasses::Id CommonSubexpressionEliminator::stackElement( + int _stackHeight, + SourceLocation const& _location +) +{ + if (m_stackElements.count(_stackHeight)) + return m_stackElements.at(_stackHeight); + // Stack element not found (not assigned yet), create new equivalence class. + return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location); +} + +ExpressionClasses::Id CommonSubexpressionEliminator::initialStackElement( + int _stackHeight, + SourceLocation const& _location +) +{ + assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested."); + assertThrow(_stackHeight > -16, StackTooDeepException, ""); + // This is a special assembly item that refers to elements pre-existing on the initial stack. + return m_expressionClasses.find(AssemblyItem(dupInstruction(1 - _stackHeight), _location)); +} + +void CommonSubexpressionEliminator::storeInStorage(Id _slot, Id _value, SourceLocation const& _location) +{ + if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value) + // do not execute the storage if we know that the value is already there + return; + m_sequenceNumber++; + decltype(m_storageContent) storageContents; + // Copy over all values (i.e. retain knowledge about them) where we know that this store + // operation will not destroy the knowledge. Specifically, we copy storage locations we know + // are different from _slot or locations where we know that the stored value is equal to _value. + for (auto const& storageItem: m_storageContent) + if (m_expressionClasses.knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value) + storageContents.insert(storageItem); + m_storageContent = move(storageContents); + + AssemblyItem item(Instruction::SSTORE, _location); + Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber); + m_storeOperations.push_back(StoreOperation(StoreOperation::Storage, _slot, m_sequenceNumber, id)); + m_storageContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; +} + +ExpressionClasses::Id CommonSubexpressionEliminator::loadFromStorage(Id _slot, SourceLocation const& _location) +{ + if (m_storageContent.count(_slot)) + return m_storageContent.at(_slot); + + AssemblyItem item(Instruction::SLOAD, _location); + return m_storageContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber); +} + +void CommonSubexpressionEliminator::storeInMemory(Id _slot, Id _value, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value) + // do not execute the store if we know that the value is already there + return; + m_sequenceNumber++; + decltype(m_memoryContent) memoryContents; + // copy over values at points where we know that they are different from _slot by at least 32 + for (auto const& memoryItem: m_memoryContent) + if (m_expressionClasses.knownToBeDifferentBy32(memoryItem.first, _slot)) + memoryContents.insert(memoryItem); + m_memoryContent = move(memoryContents); + + AssemblyItem item(Instruction::MSTORE, _location); + Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber); + m_storeOperations.push_back(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id)); + m_memoryContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; +} + +ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(Id _slot, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot)) + return m_memoryContent.at(_slot); + + AssemblyItem item(Instruction::MLOAD, _location); + return m_memoryContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber); +} + +CommonSubexpressionEliminator::Id CommonSubexpressionEliminator::applySha3( + Id _start, + Id _length, + SourceLocation const& _location +) +{ + AssemblyItem sha3Item(Instruction::SHA3, _location); + // Special logic if length is a short constant, otherwise we cannot tell. + u256 const* l = m_expressionClasses.knownConstant(_length); + // unknown or too large length + if (!l || *l > 128) + return m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber); + + vector<Id> arguments; + for (u256 i = 0; i < *l; i += 32) + { + Id slot = m_expressionClasses.find( + AssemblyItem(Instruction::ADD, _location), + {_start, m_expressionClasses.find(i)} + ); + arguments.push_back(loadFromMemory(slot, _location)); + } + if (m_knownSha3Hashes.count(arguments)) + return m_knownSha3Hashes.at(arguments); + Id v; + // If all arguments are known constants, compute the sha3 here + if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses.knownConstant(_a); })) + { + bytes data; + for (Id a: arguments) + data += toBigEndian(*m_expressionClasses.knownConstant(a)); + data.resize(size_t(*l)); + v = m_expressionClasses.find(AssemblyItem(u256(sha3(data)), _location)); + } + else + v = m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber); + return m_knownSha3Hashes[arguments] = v; +} + +CSECodeGenerator::CSECodeGenerator( + ExpressionClasses& _expressionClasses, + vector<CSECodeGenerator::StoreOperation> const& _storeOperations +): + m_expressionClasses(_expressionClasses) +{ + for (auto const& store: _storeOperations) + m_storeOperations[make_pair(store.target, store.slot)].push_back(store); +} + +AssemblyItems CSECodeGenerator::generateCode( + map<int, Id> const& _initialStack, + map<int, Id> const& _targetStackContents +) +{ + m_stack = _initialStack; + for (auto const& item: m_stack) + if (!m_classPositions.count(item.second)) + m_classPositions[item.second] = item.first; + + // @todo: provide information about the positions of copies of class elements + + // generate the dependency graph starting from final storage and memory writes and target stack contents + for (auto const& p: m_storeOperations) + addDependencies(p.second.back().expression); + for (auto const& targetItem: _targetStackContents) + { + m_finalClasses.insert(targetItem.second); + addDependencies(targetItem.second); + } + + // store all needed sequenced expressions + set<pair<unsigned, Id>> sequencedExpressions; + for (auto const& p: m_neededBy) + for (auto id: {p.first, p.second}) + if (unsigned seqNr = m_expressionClasses.representative(id).sequenceNumber) + sequencedExpressions.insert(make_pair(seqNr, id)); + + // Perform all operations on storage and memory in order, if they are needed. + for (auto const& seqAndId: sequencedExpressions) + if (!m_classPositions.count(seqAndId.second)) + generateClassElement(seqAndId.second, true); + + // generate the target stack elements + for (auto const& targetItem: _targetStackContents) + { + int position = generateClassElement(targetItem.second); + assertThrow(position != c_invalidPosition, OptimizerException, ""); + if (position == targetItem.first) + continue; + SourceLocation const& location = m_expressionClasses.representative(targetItem.second).item->getLocation(); + if (position < targetItem.first) + // it is already at its target, we need another copy + appendDup(position, location); + else + appendOrRemoveSwap(position, location); + appendOrRemoveSwap(targetItem.first, location); + } + + // remove surplus elements + while (removeStackTopIfPossible()) + { + // no-op + } + + // check validity + int finalHeight = 0; + if (!_targetStackContents.empty()) + // have target stack, so its height should be the final height + finalHeight = (--_targetStackContents.end())->first; + else if (!_initialStack.empty()) + // no target stack, only erase the initial stack + finalHeight = _initialStack.begin()->first - 1; + else + // neither initial no target stack, no change in height + finalHeight = 0; + assertThrow(finalHeight == m_stackHeight, OptimizerException, "Incorrect final stack height."); + return m_generatedItems; +} + +void CSECodeGenerator::addDependencies(Id _c) +{ + if (m_neededBy.count(_c)) + return; // we already computed the dependencies for _c + ExpressionClasses::Expression expr = m_expressionClasses.representative(_c); + for (Id argument: expr.arguments) + { + addDependencies(argument); + m_neededBy.insert(make_pair(argument, _c)); + } + if (expr.item->type() == Operation && ( + expr.item->instruction() == Instruction::SLOAD || + expr.item->instruction() == Instruction::MLOAD || + expr.item->instruction() == Instruction::SHA3 + )) + { + // this loads an unknown value from storage or memory and thus, in addition to its + // arguments, depends on all store operations to addresses where we do not know that + // they are different that occur before this load + StoreOperation::Target target = expr.item->instruction() == Instruction::SLOAD ? + StoreOperation::Storage : StoreOperation::Memory; + Id slotToLoadFrom = expr.arguments.at(0); + for (auto const& p: m_storeOperations) + { + if (p.first.first != target) + continue; + Id slot = p.first.second; + StoreOperations const& storeOps = p.second; + if (storeOps.front().sequenceNumber > expr.sequenceNumber) + continue; + bool knownToBeIndependent = false; + switch (expr.item->instruction()) + { + case Instruction::SLOAD: + knownToBeIndependent = m_expressionClasses.knownToBeDifferent(slot, slotToLoadFrom); + break; + case Instruction::MLOAD: + knownToBeIndependent = m_expressionClasses.knownToBeDifferentBy32(slot, slotToLoadFrom); + break; + case Instruction::SHA3: + { + Id length = expr.arguments.at(1); + AssemblyItem offsetInstr(Instruction::SUB, expr.item->getLocation()); + Id offsetToStart = m_expressionClasses.find(offsetInstr, {slot, slotToLoadFrom}); + u256 const* o = m_expressionClasses.knownConstant(offsetToStart); + u256 const* l = m_expressionClasses.knownConstant(length); + if (l && *l == 0) + knownToBeIndependent = true; + else if (o) + { + // We could get problems here if both *o and *l are larger than 2**254 + // but it is probably ok for the optimizer to produce wrong code for such cases + // which cannot be executed anyway because of the non-payable price. + if (u2s(*o) <= -32) + knownToBeIndependent = true; + else if (l && u2s(*o) >= 0 && *o >= *l) + knownToBeIndependent = true; + } + break; + } + default: + break; + } + if (knownToBeIndependent) + continue; + + // note that store and load never have the same sequence number + Id latestStore = storeOps.front().expression; + for (auto it = ++storeOps.begin(); it != storeOps.end(); ++it) + if (it->sequenceNumber < expr.sequenceNumber) + latestStore = it->expression; + addDependencies(latestStore); + m_neededBy.insert(make_pair(latestStore, _c)); + } + } +} + +int CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) +{ + // do some cleanup + removeStackTopIfPossible(); + + if (m_classPositions.count(_c)) + { + assertThrow( + m_classPositions[_c] != c_invalidPosition, + OptimizerException, + "Element already removed but still needed." + ); + return m_classPositions[_c]; + } + ExpressionClasses::Expression const& expr = m_expressionClasses.representative(_c); + assertThrow( + _allowSequenced || expr.sequenceNumber == 0, + OptimizerException, + "Sequence constrained operation requested out of sequence." + ); + vector<Id> const& arguments = expr.arguments; + for (Id arg: boost::adaptors::reverse(arguments)) + generateClassElement(arg); + + SourceLocation const& location = expr.item->getLocation(); + // The arguments are somewhere on the stack now, so it remains to move them at the correct place. + // This is quite difficult as sometimes, the values also have to removed in this process + // (if canBeRemoved() returns true) and the two arguments can be equal. For now, this is + // implemented for every single case for combinations of up to two arguments manually. + if (arguments.size() == 1) + { + if (canBeRemoved(arguments[0], _c)) + appendOrRemoveSwap(classElementPosition(arguments[0]), location); + else + appendDup(classElementPosition(arguments[0]), location); + } + else if (arguments.size() == 2) + { + if (canBeRemoved(arguments[1], _c)) + { + appendOrRemoveSwap(classElementPosition(arguments[1]), location); + if (arguments[0] == arguments[1]) + appendDup(m_stackHeight, location); + else if (canBeRemoved(arguments[0], _c)) + { + appendOrRemoveSwap(m_stackHeight - 1, location); + appendOrRemoveSwap(classElementPosition(arguments[0]), location); + } + else + appendDup(classElementPosition(arguments[0]), location); + } + else + { + if (arguments[0] == arguments[1]) + { + appendDup(classElementPosition(arguments[0]), location); + appendDup(m_stackHeight, location); + } + else if (canBeRemoved(arguments[0], _c)) + { + appendOrRemoveSwap(classElementPosition(arguments[0]), location); + appendDup(classElementPosition(arguments[1]), location); + appendOrRemoveSwap(m_stackHeight - 1, location); + } + else + { + appendDup(classElementPosition(arguments[1]), location); + appendDup(classElementPosition(arguments[0]), location); + } + } + } + else + assertThrow( + arguments.size() <= 2, + OptimizerException, + "Opcodes with more than two arguments not implemented yet." + ); + for (size_t i = 0; i < arguments.size(); ++i) + assertThrow(m_stack[m_stackHeight - i] == arguments[i], OptimizerException, "Expected arguments not present." ); + + while (SemanticInformation::isCommutativeOperation(*expr.item) && + !m_generatedItems.empty() && + m_generatedItems.back() == AssemblyItem(Instruction::SWAP1)) + // this will not append a swap but remove the one that is already there + appendOrRemoveSwap(m_stackHeight - 1, location); + for (auto arg: arguments) + if (canBeRemoved(arg, _c)) + m_classPositions[arg] = c_invalidPosition; + for (size_t i = 0; i < arguments.size(); ++i) + m_stack.erase(m_stackHeight - i); + appendItem(*expr.item); + if (expr.item->type() != Operation || instructionInfo(expr.item->instruction()).ret == 1) + { + m_stack[m_stackHeight] = _c; + return m_classPositions[_c] = m_stackHeight; + } + else + { + assertThrow( + instructionInfo(expr.item->instruction()).ret == 0, + OptimizerException, + "Invalid number of return values." + ); + return m_classPositions[_c] = c_invalidPosition; + } +} + +int CSECodeGenerator::classElementPosition(Id _id) const +{ + assertThrow( + m_classPositions.count(_id) && m_classPositions.at(_id) != c_invalidPosition, + OptimizerException, + "Element requested but is not present." + ); + return m_classPositions.at(_id); +} + +bool CSECodeGenerator::canBeRemoved(Id _element, Id _result) +{ + // Returns false if _element is finally needed or is needed by a class that has not been + // computed yet. Note that m_classPositions also includes classes that were deleted in the meantime. + if (m_finalClasses.count(_element)) + return false; + + auto range = m_neededBy.equal_range(_element); + for (auto it = range.first; it != range.second; ++it) + if (it->second != _result && !m_classPositions.count(it->second)) + return false; + return true; +} + +bool CSECodeGenerator::removeStackTopIfPossible() +{ + if (m_stack.empty()) + return false; + assertThrow(m_stack.count(m_stackHeight) > 0, OptimizerException, ""); + Id top = m_stack[m_stackHeight]; + if (!canBeRemoved(top)) + return false; + m_generatedItems.push_back(AssemblyItem(Instruction::POP)); + m_stack.erase(m_stackHeight); + m_stackHeight--; + return true; +} + +void CSECodeGenerator::appendDup(int _fromPosition, SourceLocation const& _location) +{ + assertThrow(_fromPosition != c_invalidPosition, OptimizerException, ""); + int instructionNum = 1 + m_stackHeight - _fromPosition; + assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep."); + assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access."); + appendItem(AssemblyItem(dupInstruction(instructionNum), _location)); + m_stack[m_stackHeight] = m_stack[_fromPosition]; +} + +void CSECodeGenerator::appendOrRemoveSwap(int _fromPosition, SourceLocation const& _location) +{ + assertThrow(_fromPosition != c_invalidPosition, OptimizerException, ""); + if (_fromPosition == m_stackHeight) + return; + int instructionNum = m_stackHeight - _fromPosition; + assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep."); + assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access."); + appendItem(AssemblyItem(swapInstruction(instructionNum), _location)); + // The value of a class can be present in multiple locations on the stack. We only update the + // "canonical" one that is tracked by m_classPositions + if (m_classPositions[m_stack[m_stackHeight]] == m_stackHeight) + m_classPositions[m_stack[m_stackHeight]] = _fromPosition; + if (m_classPositions[m_stack[_fromPosition]] == _fromPosition) + m_classPositions[m_stack[_fromPosition]] = m_stackHeight; + swap(m_stack[m_stackHeight], m_stack[_fromPosition]); + if (m_generatedItems.size() >= 2 && + SemanticInformation::isSwapInstruction(m_generatedItems.back()) && + *(m_generatedItems.end() - 2) == m_generatedItems.back()) + { + m_generatedItems.pop_back(); + m_generatedItems.pop_back(); + } +} + +void CSECodeGenerator::appendItem(AssemblyItem const& _item) +{ + m_generatedItems.push_back(_item); + m_stackHeight += _item.deposit(); +} diff --git a/CommonSubexpressionEliminator.h b/CommonSubexpressionEliminator.h new file mode 100644 index 00000000..6156bc81 --- /dev/null +++ b/CommonSubexpressionEliminator.h @@ -0,0 +1,233 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file CommonSubexpressionEliminator.h + * @author Christian <c@ethdev.com> + * @date 2015 + * Optimizer step for common subexpression elimination and stack reorganisation. + */ + +#pragma once + +#include <vector> +#include <map> +#include <set> +#include <tuple> +#include <ostream> +#include <libdevcore/CommonIO.h> +#include <libdevcore/Exceptions.h> +#include <libevmasm/ExpressionClasses.h> +#include <libevmasm/SemanticInformation.h> + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector<AssemblyItem>; + +/** + * Optimizer step that performs common subexpression elimination and stack reorganisation, + * i.e. it tries to infer equality among expressions and compute the values of two expressions + * known to be equal only once. + * + * The general workings are that for each assembly item that is fed into the eliminator, an + * equivalence class is derived from the operation and the equivalence class of its arguments. + * DUPi, SWAPi and some arithmetic instructions are used to infer equivalences while these + * classes are determined. + * + * When the list of optimized items is requested, they are generated in a bottom-up fashion, + * adding code for equivalence classes that were not yet computed. + */ +class CommonSubexpressionEliminator +{ +public: + using Id = ExpressionClasses::Id; + struct StoreOperation + { + enum Target { Memory, Storage }; + StoreOperation( + Target _target, + Id _slot, + unsigned _sequenceNumber, + Id _expression + ): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {} + Target target; + Id slot; + unsigned sequenceNumber; + Id expression; + }; + + /// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first + /// item that must be fed into a new instance of the eliminator. + template <class _AssemblyItemIterator> + _AssemblyItemIterator feedItems(_AssemblyItemIterator _iterator, _AssemblyItemIterator _end); + + /// @returns the resulting items after optimization. + AssemblyItems getOptimizedItems(); + + /// Streams debugging information to @a _out. + std::ostream& stream( + std::ostream& _out, + std::map<int, Id> _initialStack = std::map<int, Id>(), + std::map<int, Id> _targetStack = std::map<int, Id>() + ) const; + +private: + /// Feeds the item into the system for analysis. + void feedItem(AssemblyItem const& _item, bool _copyItem = false); + + /// Tries to optimize the item that breaks the basic block at the end. + void optimizeBreakingItem(); + + /// Simplifies the given item using + /// Assigns a new equivalence class to the next sequence number of the given stack element. + void setStackElement(int _stackHeight, Id _class); + /// Swaps the given stack elements in their next sequence number. + void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location); + /// Retrieves the current equivalence class fo the given stack element (or generates a new + /// one if it does not exist yet). + Id stackElement(int _stackHeight, SourceLocation const& _location); + /// @returns the equivalence class id of the special initial stack element at the given height + /// (must not be positive). + Id initialStackElement(int _stackHeight, SourceLocation const& _location); + + /// Increments the sequence number, deletes all storage information that might be overwritten + /// and stores the new value at the given slot. + void storeInStorage(Id _slot, Id _value, SourceLocation const& _location); + /// Retrieves the current value at the given slot in storage or creates a new special sload class. + Id loadFromStorage(Id _slot, SourceLocation const& _location); + /// Increments the sequence number, deletes all memory information that might be overwritten + /// and stores the new value at the given slot. + void storeInMemory(Id _slot, Id _value, SourceLocation const& _location); + /// Retrieves the current value at the given slot in memory or creates a new special mload class. + Id loadFromMemory(Id _slot, SourceLocation const& _location); + /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory. + Id applySha3(Id _start, Id _length, SourceLocation const& _location); + + /// Current stack height, can be negative. + int m_stackHeight = 0; + /// Current stack layout, mapping stack height -> equivalence class + std::map<int, Id> m_stackElements; + /// Current sequence number, this is incremented with each modification to storage or memory. + unsigned m_sequenceNumber = 1; + /// Knowledge about storage content. + std::map<Id, Id> m_storageContent; + /// Knowledge about memory content. Keys are memory addresses, note that the values overlap + /// and are not contained here if they are not completely known. + std::map<Id, Id> m_memoryContent; + /// Keeps record of all sha3 hashes that are computed. + std::map<std::vector<Id>, Id> m_knownSha3Hashes; + /// Keeps information about which storage or memory slots were written to at which sequence + /// number with what instruction. + std::vector<StoreOperation> m_storeOperations; + /// Structure containing the classes of equivalent expressions. + ExpressionClasses m_expressionClasses; + + /// The item that breaks the basic block, can be nullptr. + /// It is usually appended to the block but can be optimized in some cases. + AssemblyItem const* m_breakingItem = nullptr; +}; + +/** + * Unit that generates code from current stack layout, target stack layout and information about + * the equivalence classes. + */ +class CSECodeGenerator +{ +public: + using StoreOperation = CommonSubexpressionEliminator::StoreOperation; + using StoreOperations = std::vector<StoreOperation>; + using Id = ExpressionClasses::Id; + + /// Initializes the code generator with the given classes and store operations. + /// The store operations have to be sorted by sequence number in ascending order. + CSECodeGenerator(ExpressionClasses& _expressionClasses, StoreOperations const& _storeOperations); + + /// @returns the assembly items generated from the given requirements + /// @param _initialStack current contents of the stack (up to stack height of zero) + /// @param _targetStackContents final contents of the stack, by stack height relative to initial + /// @note should only be called once on each object. + AssemblyItems generateCode( + std::map<int, Id> const& _initialStack, + std::map<int, Id> const& _targetStackContents + ); + +private: + /// Recursively discovers all dependencies to @a m_requests. + void addDependencies(Id _c); + + /// Produce code that generates the given element if it is not yet present. + /// @returns the stack position of the element or c_invalidPosition if it does not actually + /// generate a value on the stack. + /// @param _allowSequenced indicates that sequence-constrained operations are allowed + int generateClassElement(Id _c, bool _allowSequenced = false); + /// @returns the position of the representative of the given id on the stack. + /// @note throws an exception if it is not on the stack. + int classElementPosition(Id _id) const; + + /// @returns true if @a _element can be removed - in general or, if given, while computing @a _result. + bool canBeRemoved(Id _element, Id _result = Id(-1)); + + /// Appends code to remove the topmost stack element if it can be removed. + bool removeStackTopIfPossible(); + + /// Appends a dup instruction to m_generatedItems to retrieve the element at the given stack position. + void appendDup(int _fromPosition, SourceLocation const& _location); + /// Appends a swap instruction to m_generatedItems to retrieve the element at the given stack position. + /// @note this might also remove the last item if it exactly the same swap instruction. + void appendOrRemoveSwap(int _fromPosition, SourceLocation const& _location); + /// Appends the given assembly item. + void appendItem(AssemblyItem const& _item); + + static const int c_invalidPosition = -0x7fffffff; + + AssemblyItems m_generatedItems; + /// Current height of the stack relative to the start. + int m_stackHeight = 0; + /// If (b, a) is in m_requests then b is needed to compute a. + std::multimap<Id, Id> m_neededBy; + /// Current content of the stack. + std::map<int, Id> m_stack; + /// Current positions of equivalence classes, equal to c_invalidPosition if already deleted. + std::map<Id, int> m_classPositions; + + /// The actual eqivalence class items and how to compute them. + ExpressionClasses& m_expressionClasses; + /// Keeps information about which storage or memory slots were written to by which operations. + /// The operations are sorted ascendingly by sequence number. + std::map<std::pair<StoreOperation::Target, Id>, StoreOperations> m_storeOperations; + /// The set of equivalence classes that should be present on the stack at the end. + std::set<Id> m_finalClasses; +}; + +template <class _AssemblyItemIterator> +_AssemblyItemIterator CommonSubexpressionEliminator::feedItems( + _AssemblyItemIterator _iterator, + _AssemblyItemIterator _end +) +{ + for (; _iterator != _end && !SemanticInformation::breaksCSEAnalysisBlock(*_iterator); ++_iterator) + feedItem(*_iterator); + if (_iterator != _end) + m_breakingItem = &(*_iterator++); + return _iterator; +} + +} +} diff --git a/ControlFlowGraph.cpp b/ControlFlowGraph.cpp new file mode 100644 index 00000000..cc4367e6 --- /dev/null +++ b/ControlFlowGraph.cpp @@ -0,0 +1,260 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file ControlFlowGraph.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Control flow analysis for the optimizer. + */ + +#include <libevmasm/ControlFlowGraph.h> +#include <map> +#include <libevmasm/Exceptions.h> +#include <libevmasm/AssemblyItem.h> +#include <libevmasm/SemanticInformation.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + +BlockId::BlockId(u256 const& _id): m_id(_id) +{ + assertThrow( _id < initial().m_id, OptimizerException, "Tag number too large."); +} + +AssemblyItems ControlFlowGraph::optimisedItems() +{ + if (m_items.empty()) + return m_items; + + findLargestTag(); + splitBlocks(); + resolveNextLinks(); + removeUnusedBlocks(); + setPrevLinks(); + + return rebuildCode(); +} + +void ControlFlowGraph::findLargestTag() +{ + m_lastUsedId = 0; + for (auto const& item: m_items) + if (item.type() == Tag || item.type() == PushTag) + { + // Assert that it can be converted. + BlockId(item.data()); + m_lastUsedId = max(unsigned(item.data()), m_lastUsedId); + } +} + +void ControlFlowGraph::splitBlocks() +{ + m_blocks.clear(); + BlockId id = BlockId::initial(); + m_blocks[id].begin = 0; + for (size_t index = 0; index < m_items.size(); ++index) + { + AssemblyItem const& item = m_items.at(index); + if (item.type() == Tag) + { + if (id) + m_blocks[id].end = index; + id = BlockId::invalid(); + } + if (!id) + { + id = item.type() == Tag ? BlockId(item.data()) : generateNewId(); + m_blocks[id].begin = index; + } + if (item.type() == PushTag) + m_blocks[id].pushedTags.push_back(BlockId(item.data())); + if (SemanticInformation::altersControlFlow(item)) + { + m_blocks[id].end = index + 1; + if (item == Instruction::JUMP) + m_blocks[id].endType = BasicBlock::EndType::JUMP; + else if (item == Instruction::JUMPI) + m_blocks[id].endType = BasicBlock::EndType::JUMPI; + else + m_blocks[id].endType = BasicBlock::EndType::STOP; + id = BlockId::invalid(); + } + } + if (id) + { + m_blocks[id].end = m_items.size(); + if (m_blocks[id].endType == BasicBlock::EndType::HANDOVER) + m_blocks[id].endType = BasicBlock::EndType::STOP; + } +} + +void ControlFlowGraph::resolveNextLinks() +{ + map<unsigned, BlockId> blockByBeginPos; + for (auto const& idAndBlock: m_blocks) + if (idAndBlock.second.begin != idAndBlock.second.end) + blockByBeginPos[idAndBlock.second.begin] = idAndBlock.first; + + for (auto& idAndBlock: m_blocks) + { + BasicBlock& block = idAndBlock.second; + switch (block.endType) + { + case BasicBlock::EndType::JUMPI: + case BasicBlock::EndType::HANDOVER: + assertThrow( + blockByBeginPos.count(block.end), + OptimizerException, + "Successor block not found." + ); + block.next = blockByBeginPos.at(block.end); + break; + default: + break; + } + } +} + +void ControlFlowGraph::removeUnusedBlocks() +{ + vector<BlockId> blocksToProcess{BlockId::initial()}; + set<BlockId> neededBlocks{BlockId::initial()}; + while (!blocksToProcess.empty()) + { + BasicBlock const& block = m_blocks.at(blocksToProcess.back()); + blocksToProcess.pop_back(); + for (BlockId tag: block.pushedTags) + if (!neededBlocks.count(tag)) + { + neededBlocks.insert(tag); + blocksToProcess.push_back(tag); + } + if (block.next && !neededBlocks.count(block.next)) + { + neededBlocks.insert(block.next); + blocksToProcess.push_back(block.next); + } + } + for (auto it = m_blocks.begin(); it != m_blocks.end();) + if (neededBlocks.count(it->first)) + ++it; + else + m_blocks.erase(it++); +} + +void ControlFlowGraph::setPrevLinks() +{ + for (auto& idAndBlock: m_blocks) + { + BasicBlock& block = idAndBlock.second; + switch (block.endType) + { + case BasicBlock::EndType::JUMPI: + case BasicBlock::EndType::HANDOVER: + assertThrow( + !m_blocks.at(block.next).prev, + OptimizerException, + "Successor already has predecessor." + ); + m_blocks[block.next].prev = idAndBlock.first; + break; + default: + break; + } + } + // If block ends with jump to not yet linked block, link them removing the jump + for (auto& idAndBlock: m_blocks) + { + BlockId blockId = idAndBlock.first; + BasicBlock& block = idAndBlock.second; + if (block.endType != BasicBlock::EndType::JUMP || block.end - block.begin < 2) + continue; + AssemblyItem const& push = m_items.at(block.end - 2); + if (push.type() != PushTag) + continue; + BlockId nextId(push.data()); + if (m_blocks.at(nextId).prev) + continue; + bool hasLoop = false; + for (BlockId id = nextId; id && !hasLoop; id = m_blocks.at(id).next) + hasLoop = (id == blockId); + if (hasLoop) + continue; + + m_blocks[nextId].prev = blockId; + block.next = nextId; + block.end -= 2; + assertThrow( + !block.pushedTags.empty() && block.pushedTags.back() == nextId, + OptimizerException, + "Last pushed tag not at end of pushed list." + ); + block.pushedTags.pop_back(); + block.endType = BasicBlock::EndType::HANDOVER; + } +} + +AssemblyItems ControlFlowGraph::rebuildCode() +{ + map<BlockId, unsigned> pushes; + for (auto& idAndBlock: m_blocks) + for (BlockId ref: idAndBlock.second.pushedTags) + pushes[ref]++; + + set<BlockId> blocksToAdd; + for (auto it: m_blocks) + blocksToAdd.insert(it.first); + set<BlockId> blocksAdded; + AssemblyItems code; + + for ( + BlockId blockId = BlockId::initial(); + blockId; + blockId = blocksToAdd.empty() ? BlockId::invalid() : *blocksToAdd.begin() + ) + { + bool previousHandedOver = (blockId == BlockId::initial()); + while (m_blocks.at(blockId).prev) + blockId = m_blocks.at(blockId).prev; + for (; blockId; blockId = m_blocks.at(blockId).next) + { + BasicBlock const& block = m_blocks.at(blockId); + blocksToAdd.erase(blockId); + blocksAdded.insert(blockId); + + auto begin = m_items.begin() + block.begin; + auto end = m_items.begin() + block.end; + if (begin == end) + continue; + // If block starts with unused tag, skip it. + if (previousHandedOver && !pushes[blockId] && begin->type() == Tag) + ++begin; + previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER); + copy(begin, end, back_inserter(code)); + } + } + + return code; +} + +BlockId ControlFlowGraph::generateNewId() +{ + BlockId id = BlockId(++m_lastUsedId); + assertThrow(id < BlockId::initial(), OptimizerException, "Out of block IDs."); + return id; +} diff --git a/ControlFlowGraph.h b/ControlFlowGraph.h new file mode 100644 index 00000000..5d16df32 --- /dev/null +++ b/ControlFlowGraph.h @@ -0,0 +1,108 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file ControlFlowGraph.h + * @author Christian <c@ethdev.com> + * @date 2015 + * Control flow analysis for the optimizer. + */ + +#pragma once + +#include <vector> +#include <libdevcore/Common.h> +#include <libdevcore/Assertions.h> + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector<AssemblyItem>; + +/** + * Identifier for a block, coincides with the tag number of an AssemblyItem but adds a special + * ID for the inital block. + */ +class BlockId +{ +public: + BlockId() { *this = invalid(); } + explicit BlockId(unsigned _id): m_id(_id) {} + explicit BlockId(u256 const& _id); + static BlockId initial() { return BlockId(-2); } + static BlockId invalid() { return BlockId(-1); } + + bool operator==(BlockId const& _other) const { return m_id == _other.m_id; } + bool operator!=(BlockId const& _other) const { return m_id != _other.m_id; } + bool operator<(BlockId const& _other) const { return m_id < _other.m_id; } + explicit operator bool() const { return *this != invalid(); } + +private: + unsigned m_id; +}; + +/** + * Control flow block inside which instruction counter is always incremented by one + * (except for possibly the last instruction). + */ +struct BasicBlock +{ + /// Start index into assembly item list. + unsigned begin = 0; + /// End index (excluded) inte assembly item list. + unsigned end = 0; + /// Tags pushed inside this block, with multiplicity. + std::vector<BlockId> pushedTags; + /// ID of the block that always follows this one (either JUMP or flow into new block), + /// or BlockId::invalid() otherwise + BlockId next = BlockId::invalid(); + /// ID of the block that has to precede this one. + BlockId prev = BlockId::invalid(); + + enum class EndType { JUMP, JUMPI, STOP, HANDOVER }; + EndType endType = EndType::HANDOVER; +}; + +class ControlFlowGraph +{ +public: + /// Initializes the control flow graph. + /// @a _items has to persist across the usage of this class. + ControlFlowGraph(AssemblyItems const& _items): m_items(_items) {} + /// @returns the collection of optimised items, should be called only once. + AssemblyItems optimisedItems(); + +private: + void findLargestTag(); + void splitBlocks(); + void resolveNextLinks(); + void removeUnusedBlocks(); + void setPrevLinks(); + AssemblyItems rebuildCode(); + + BlockId generateNewId(); + + unsigned m_lastUsedId = 0; + AssemblyItems const& m_items; + std::map<BlockId, BasicBlock> m_blocks; +}; + + +} +} diff --git a/Exceptions.h b/Exceptions.h new file mode 100644 index 00000000..7cc190e4 --- /dev/null +++ b/Exceptions.h @@ -0,0 +1,36 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Exceptions.h + * @author Christian <c@ethdev.com> + * @date 2014 + */ + +#pragma once + +#include <libdevcore/Exceptions.h> + +namespace dev +{ +namespace eth +{ + +struct AssemblyException: virtual Exception {}; +struct OptimizerException: virtual AssemblyException {}; +struct StackTooDeepException: virtual OptimizerException {}; + +} +} diff --git a/ExpressionClasses.cpp b/ExpressionClasses.cpp new file mode 100644 index 00000000..1e60a7fe --- /dev/null +++ b/ExpressionClasses.cpp @@ -0,0 +1,438 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file ExpressionClasses.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Container for equivalence classes of expressions for use in common subexpression elimination. + */ + +#include <libevmasm/ExpressionClasses.h> +#include <utility> +#include <tuple> +#include <functional> +#include <boost/range/adaptor/reversed.hpp> +#include <boost/noncopyable.hpp> +#include <libevmasm/Assembly.h> +#include <libevmasm/CommonSubexpressionEliminator.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + + +bool ExpressionClasses::Expression::operator<(ExpressionClasses::Expression const& _other) const +{ + auto type = item->type(); + auto otherType = _other.item->type(); + return std::tie(type, item->data(), arguments, sequenceNumber) < + std::tie(otherType, _other.item->data(), _other.arguments, _other.sequenceNumber); +} + +ExpressionClasses::Id ExpressionClasses::find( + AssemblyItem const& _item, + Ids const& _arguments, + bool _copyItem, + unsigned _sequenceNumber +) +{ + Expression exp; + exp.id = Id(-1); + exp.item = &_item; + exp.arguments = _arguments; + exp.sequenceNumber = _sequenceNumber; + + if (SemanticInformation::isCommutativeOperation(_item)) + sort(exp.arguments.begin(), exp.arguments.end()); + + auto it = m_expressions.find(exp); + if (it != m_expressions.end()) + return it->id; + + if (_copyItem) + exp.item = storeItem(_item); + + ExpressionClasses::Id id = tryToSimplify(exp); + if (id < m_representatives.size()) + exp.id = id; + else + { + exp.id = m_representatives.size(); + m_representatives.push_back(exp); + } + m_expressions.insert(exp); + return exp.id; +} + +bool ExpressionClasses::knownToBeDifferent(ExpressionClasses::Id _a, ExpressionClasses::Id _b) +{ + // Try to simplify "_a - _b" and return true iff the value is a non-zero constant. + return knownNonZero(find(Instruction::SUB, {_a, _b})); +} + +bool ExpressionClasses::knownToBeDifferentBy32(ExpressionClasses::Id _a, ExpressionClasses::Id _b) +{ + // Try to simplify "_a - _b" and return true iff the value is at least 32 away from zero. + u256 const* v = knownConstant(find(Instruction::SUB, {_a, _b})); + // forbidden interval is ["-31", 31] + return v && *v + 31 > u256(62); +} + +bool ExpressionClasses::knownZero(Id _c) +{ + return Pattern(u256(0)).matches(representative(_c), *this); +} + +bool ExpressionClasses::knownNonZero(Id _c) +{ + return Pattern(u256(0)).matches(representative(find(Instruction::ISZERO, {_c})), *this); +} + +u256 const* ExpressionClasses::knownConstant(Id _c) +{ + map<unsigned, Expression const*> matchGroups; + Pattern constant(Push); + constant.setMatchGroup(1, matchGroups); + if (!constant.matches(representative(_c), *this)) + return nullptr; + return &constant.d(); +} + +AssemblyItem const* ExpressionClasses::storeItem(AssemblyItem const& _item) +{ + m_spareAssemblyItems.push_back(make_shared<AssemblyItem>(_item)); + return m_spareAssemblyItems.back().get(); +} + +string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const +{ + Expression const& expr = representative(_id); + stringstream str; + str << dec << expr.id << ":" << *expr.item << "("; + for (Id arg: expr.arguments) + str << fullDAGToString(arg) << ","; + str << ")"; + return str.str(); +} + +class Rules: public boost::noncopyable +{ +public: + Rules(); + void resetMatchGroups() { m_matchGroups.clear(); } + vector<pair<Pattern, function<Pattern()>>> rules() const { return m_rules; } + +private: + using Expression = ExpressionClasses::Expression; + map<unsigned, Expression const*> m_matchGroups; + vector<pair<Pattern, function<Pattern()>>> m_rules; +}; + +Rules::Rules() +{ + // Multiple occurences of one of these inside one rule must match the same equivalence class. + // Constants. + Pattern A(Push); + Pattern B(Push); + Pattern C(Push); + // Anything. + Pattern X; + Pattern Y; + Pattern Z; + A.setMatchGroup(1, m_matchGroups); + B.setMatchGroup(2, m_matchGroups); + C.setMatchGroup(3, m_matchGroups); + X.setMatchGroup(4, m_matchGroups); + Y.setMatchGroup(5, m_matchGroups); + Z.setMatchGroup(6, m_matchGroups); + + m_rules = vector<pair<Pattern, function<Pattern()>>>{ + // arithmetics on constants + {{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }}, + {{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }}, + {{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }}, + {{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : A.d() / B.d(); }}, + {{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(u2s(A.d()) / u2s(B.d())); }}, + {{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : A.d() % B.d(); }}, + {{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(u2s(A.d()) % u2s(B.d())); }}, + {{Instruction::EXP, {A, B}}, [=]{ return u256(boost::multiprecision::powm(bigint(A.d()), bigint(B.d()), bigint(1) << 256)); }}, + {{Instruction::NOT, {A}}, [=]{ return ~A.d(); }}, + {{Instruction::LT, {A, B}}, [=]() { return A.d() < B.d() ? u256(1) : 0; }}, + {{Instruction::GT, {A, B}}, [=]() -> u256 { return A.d() > B.d() ? 1 : 0; }}, + {{Instruction::SLT, {A, B}}, [=]() -> u256 { return u2s(A.d()) < u2s(B.d()) ? 1 : 0; }}, + {{Instruction::SGT, {A, B}}, [=]() -> u256 { return u2s(A.d()) > u2s(B.d()) ? 1 : 0; }}, + {{Instruction::EQ, {A, B}}, [=]() -> u256 { return A.d() == B.d() ? 1 : 0; }}, + {{Instruction::ISZERO, {A}}, [=]() -> u256 { return A.d() == 0 ? 1 : 0; }}, + {{Instruction::AND, {A, B}}, [=]{ return A.d() & B.d(); }}, + {{Instruction::OR, {A, B}}, [=]{ return A.d() | B.d(); }}, + {{Instruction::XOR, {A, B}}, [=]{ return A.d() ^ B.d(); }}, + {{Instruction::BYTE, {A, B}}, [=]{ return A.d() >= 32 ? 0 : (B.d() >> unsigned(8 * (31 - A.d()))) & 0xff; }}, + {{Instruction::ADDMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) + bigint(B.d())) % C.d()); }}, + {{Instruction::MULMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) * bigint(B.d())) % C.d()); }}, + {{Instruction::MULMOD, {A, B, C}}, [=]{ return A.d() * B.d(); }}, + {{Instruction::SIGNEXTEND, {A, B}}, [=]() -> u256 { + if (A.d() >= 31) + return B.d(); + unsigned testBit = unsigned(A.d()) * 8 + 7; + u256 mask = (u256(1) << testBit) - 1; + return u256(boost::multiprecision::bit_test(B.d(), testBit) ? B.d() | ~mask : B.d() & mask); + }}, + + // invariants involving known constants + {{Instruction::ADD, {X, 0}}, [=]{ return X; }}, + {{Instruction::MUL, {X, 1}}, [=]{ return X; }}, + {{Instruction::DIV, {X, 1}}, [=]{ return X; }}, + {{Instruction::SDIV, {X, 1}}, [=]{ return X; }}, + {{Instruction::OR, {X, 0}}, [=]{ return X; }}, + {{Instruction::XOR, {X, 0}}, [=]{ return X; }}, + {{Instruction::AND, {X, ~u256(0)}}, [=]{ return X; }}, + {{Instruction::MUL, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::DIV, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {0, X}}, [=]{ return u256(0); }}, + {{Instruction::AND, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::OR, {X, ~u256(0)}}, [=]{ return ~u256(0); }}, + // operations involving an expression and itself + {{Instruction::AND, {X, X}}, [=]{ return X; }}, + {{Instruction::OR, {X, X}}, [=]{ return X; }}, + {{Instruction::SUB, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::EQ, {X, X}}, [=]{ return u256(1); }}, + {{Instruction::LT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SLT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::GT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SGT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {X, X}}, [=]{ return u256(0); }}, + + {{Instruction::NOT, {{Instruction::NOT, {X}}}}, [=]{ return X; }}, + }; + // Associative operations + for (auto const& opFun: vector<pair<Instruction,function<u256(u256 const&,u256 const&)>>>{ + {Instruction::ADD, plus<u256>()}, + {Instruction::MUL, multiplies<u256>()}, + {Instruction::AND, bit_and<u256>()}, + {Instruction::OR, bit_or<u256>()}, + {Instruction::XOR, bit_xor<u256>()} + }) + { + auto op = opFun.first; + auto fun = opFun.second; + // Moving constants to the outside, order matters here! + // we need actions that return expressions (or patterns?) here, and we need also reversed rules + // (X+A)+B -> X+(A+B) + m_rules += vector<pair<Pattern, function<Pattern()>>>{{ + {op, {{op, {X, A}}, B}}, + [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } + }, { + // X+(Y+A) -> (X+Y)+A + {op, {{op, {X, A}}, Y}}, + [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } + }, { + // For now, we still need explicit commutativity for the inner pattern + {op, {{op, {A, X}}, B}}, + [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } + }, { + {op, {{op, {A, X}}, Y}}, + [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } + }}; + } + // move constants across subtractions + m_rules += vector<pair<Pattern, function<Pattern()>>>{ + { + // X - A -> X + (-A) + {Instruction::SUB, {X, A}}, + [=]() -> Pattern { return {Instruction::ADD, {X, 0 - A.d()}}; } + }, { + // (X + A) - Y -> (X - Y) + A + {Instruction::SUB, {{Instruction::ADD, {X, A}}, Y}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } + }, { + // (A + X) - Y -> (X - Y) + A + {Instruction::SUB, {{Instruction::ADD, {A, X}}, Y}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } + }, { + // X - (Y + A) -> (X - Y) + (-A) + {Instruction::SUB, {X, {Instruction::ADD, {Y, A}}}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } + }, { + // X - (A + Y) -> (X - Y) + (-A) + {Instruction::SUB, {X, {Instruction::ADD, {A, Y}}}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } + } + }; +} + +ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, bool _secondRun) +{ + static Rules rules; + + if (_expr.item->type() != Operation) + return -1; + + for (auto const& rule: rules.rules()) + { + rules.resetMatchGroups(); + if (rule.first.matches(_expr, *this)) + { + // Debug info + //cout << "Simplifying " << *_expr.item << "("; + //for (Id arg: _expr.arguments) + // cout << fullDAGToString(arg) << ", "; + //cout << ")" << endl; + //cout << "with rule " << rule.first.toString() << endl; + //ExpressionTemplate t(rule.second()); + //cout << "to " << rule.second().toString() << endl; + return rebuildExpression(ExpressionTemplate(rule.second(), _expr.item->getLocation())); + } + } + + if (!_secondRun && _expr.arguments.size() == 2 && SemanticInformation::isCommutativeOperation(*_expr.item)) + { + Expression expr = _expr; + swap(expr.arguments[0], expr.arguments[1]); + return tryToSimplify(expr, true); + } + + return -1; +} + +ExpressionClasses::Id ExpressionClasses::rebuildExpression(ExpressionTemplate const& _template) +{ + if (_template.hasId) + return _template.id; + + Ids arguments; + for (ExpressionTemplate const& t: _template.arguments) + arguments.push_back(rebuildExpression(t)); + return find(_template.item, arguments); +} + + +Pattern::Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments): + m_type(Operation), + m_requireDataMatch(true), + m_data(_instruction), + m_arguments(_arguments) +{ +} + +void Pattern::setMatchGroup(unsigned _group, map<unsigned, Expression const*>& _matchGroups) +{ + m_matchGroup = _group; + m_matchGroups = &_matchGroups; +} + +bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const +{ + if (!matchesBaseItem(*_expr.item)) + return false; + if (m_matchGroup) + { + if (!m_matchGroups->count(m_matchGroup)) + (*m_matchGroups)[m_matchGroup] = &_expr; + else if ((*m_matchGroups)[m_matchGroup]->id != _expr.id) + return false; + } + assertThrow(m_arguments.size() == 0 || _expr.arguments.size() == m_arguments.size(), OptimizerException, ""); + for (size_t i = 0; i < m_arguments.size(); ++i) + if (!m_arguments[i].matches(_classes.representative(_expr.arguments[i]), _classes)) + return false; + return true; +} + +AssemblyItem Pattern::toAssemblyItem(SourceLocation const& _location) const +{ + return AssemblyItem(m_type, m_data, _location); +} + +string Pattern::toString() const +{ + stringstream s; + switch (m_type) + { + case Operation: + s << instructionInfo(Instruction(unsigned(m_data))).name; + break; + case Push: + s << "PUSH " << hex << m_data; + break; + case UndefinedItem: + s << "ANY"; + break; + default: + s << "t=" << dec << m_type << " d=" << hex << m_data; + break; + } + if (!m_requireDataMatch) + s << " ~"; + if (m_matchGroup) + s << "[" << dec << m_matchGroup << "]"; + s << "("; + for (Pattern const& p: m_arguments) + s << p.toString() << ", "; + s << ")"; + return s.str(); +} + +bool Pattern::matchesBaseItem(AssemblyItem const& _item) const +{ + if (m_type == UndefinedItem) + return true; + if (m_type != _item.type()) + return false; + if (m_requireDataMatch && m_data != _item.data()) + return false; + return true; +} + +Pattern::Expression const& Pattern::matchGroupValue() const +{ + assertThrow(m_matchGroup > 0, OptimizerException, ""); + assertThrow(!!m_matchGroups, OptimizerException, ""); + assertThrow((*m_matchGroups)[m_matchGroup], OptimizerException, ""); + return *(*m_matchGroups)[m_matchGroup]; +} + + +ExpressionTemplate::ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location) +{ + if (_pattern.matchGroup()) + { + hasId = true; + id = _pattern.id(); + } + else + { + hasId = false; + item = _pattern.toAssemblyItem(_location); + } + for (auto const& arg: _pattern.arguments()) + arguments.push_back(ExpressionTemplate(arg, _location)); +} + +string ExpressionTemplate::toString() const +{ + stringstream s; + if (hasId) + s << id; + else + s << item; + s << "("; + for (auto const& arg: arguments) + s << arg.toString(); + s << ")"; + return s.str(); +} diff --git a/ExpressionClasses.h b/ExpressionClasses.h new file mode 100644 index 00000000..2f720f60 --- /dev/null +++ b/ExpressionClasses.h @@ -0,0 +1,181 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file ExpressionClasses.h + * @author Christian <c@ethdev.com> + * @date 2015 + * Container for equivalence classes of expressions for use in common subexpression elimination. + */ + +#pragma once + +#include <vector> +#include <map> +#include <memory> +#include <libdevcore/Common.h> +#include <libevmasm/AssemblyItem.h> + +namespace dev +{ +namespace eth +{ + +class Pattern; +struct ExpressionTemplate; + +/** + * Collection of classes of equivalent expressions that can also determine the class of an expression. + * Identifiers are contiguously assigned to new classes starting from zero. + */ +class ExpressionClasses +{ +public: + using Id = unsigned; + using Ids = std::vector<Id>; + + struct Expression + { + Id id; + AssemblyItem const* item; + Ids arguments; + unsigned sequenceNumber; ///< Storage modification sequence, only used for SLOAD/SSTORE instructions. + /// Behaves as if this was a tuple of (item->type(), item->data(), arguments, sequenceNumber). + bool operator<(Expression const& _other) const; + }; + + /// Retrieves the id of the expression equivalence class resulting from the given item applied to the + /// given classes, might also create a new one. + /// @param _copyItem if true, copies the assembly item to an internal storage instead of just + /// keeping a pointer. + /// The @a _sequenceNumber indicates the current storage or memory access sequence. + Id find( + AssemblyItem const& _item, + Ids const& _arguments = {}, + bool _copyItem = true, + unsigned _sequenceNumber = 0 + ); + /// @returns the canonical representative of an expression class. + Expression const& representative(Id _id) const { return m_representatives.at(_id); } + /// @returns the number of classes. + Id size() const { return m_representatives.size(); } + + /// @returns true if the values of the given classes are known to be different (on every input). + /// @note that this function might still return false for some different inputs. + bool knownToBeDifferent(Id _a, Id _b); + /// Similar to @a knownToBeDifferent but require that abs(_a - b) >= 32. + bool knownToBeDifferentBy32(Id _a, Id _b); + /// @returns true if the value of the given class is known to be zero. + /// @note that this is not the negation of knownNonZero + bool knownZero(Id _c); + /// @returns true if the value of the given class is known to be nonzero. + /// @note that this is not the negation of knownZero + bool knownNonZero(Id _c); + /// @returns a pointer to the value if the given class is known to be a constant, + /// and a nullptr otherwise. + u256 const* knownConstant(Id _c); + + /// Stores a copy of the given AssemblyItem and returns a pointer to the copy that is valid for + /// the lifetime of the ExpressionClasses object. + AssemblyItem const* storeItem(AssemblyItem const& _item); + + std::string fullDAGToString(Id _id) const; + +private: + /// Tries to simplify the given expression. + /// @returns its class if it possible or Id(-1) otherwise. + /// @param _secondRun is set to true for the second run where arguments of commutative expressions are reversed + Id tryToSimplify(Expression const& _expr, bool _secondRun = false); + + /// Rebuilds an expression from a (matched) pattern. + Id rebuildExpression(ExpressionTemplate const& _template); + + std::vector<std::pair<Pattern, std::function<Pattern()>>> createRules() const; + + /// Expression equivalence class representatives - we only store one item of an equivalence. + std::vector<Expression> m_representatives; + /// All expression ever encountered. + std::set<Expression> m_expressions; + std::vector<std::shared_ptr<AssemblyItem>> m_spareAssemblyItems; +}; + +/** + * Pattern to match against an expression. + * Also stores matched expressions to retrieve them later, for constructing new expressions using + * ExpressionTemplate. + */ +class Pattern +{ +public: + using Expression = ExpressionClasses::Expression; + using Id = ExpressionClasses::Id; + + // Matches a specific constant value. + Pattern(unsigned _value): Pattern(u256(_value)) {} + // Matches a specific constant value. + Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(_value) {} + // Matches a specific assembly item type or anything if not given. + Pattern(AssemblyItemType _type = UndefinedItem): m_type(_type) {} + // Matches a given instruction with given arguments + Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments = {}); + /// Sets this pattern to be part of the match group with the identifier @a _group. + /// Inside one rule, all patterns in the same match group have to match expressions from the + /// same expression equivalence class. + void setMatchGroup(unsigned _group, std::map<unsigned, Expression const*>& _matchGroups); + unsigned matchGroup() const { return m_matchGroup; } + bool matches(Expression const& _expr, ExpressionClasses const& _classes) const; + + AssemblyItem toAssemblyItem(SourceLocation const& _location) const; + std::vector<Pattern> arguments() const { return m_arguments; } + + /// @returns the id of the matched expression if this pattern is part of a match group. + Id id() const { return matchGroupValue().id; } + /// @returns the data of the matched expression if this pattern is part of a match group. + u256 const& d() const { return matchGroupValue().item->data(); } + + std::string toString() const; + +private: + bool matchesBaseItem(AssemblyItem const& _item) const; + Expression const& matchGroupValue() const; + + AssemblyItemType m_type; + bool m_requireDataMatch = false; + u256 m_data = 0; + std::vector<Pattern> m_arguments; + unsigned m_matchGroup = 0; + std::map<unsigned, Expression const*>* m_matchGroups = nullptr; +}; + +/** + * Template for a new expression that can be built from matched patterns. + */ +struct ExpressionTemplate +{ + using Expression = ExpressionClasses::Expression; + using Id = ExpressionClasses::Id; + explicit ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location); + std::string toString() const; + bool hasId = false; + /// Id of the matched expression, if available. + Id id = Id(-1); + // Otherwise, assembly item. + AssemblyItem item = UndefinedItem; + std::vector<ExpressionTemplate> arguments; +}; + +} +} diff --git a/SemanticInformation.cpp b/SemanticInformation.cpp new file mode 100644 index 00000000..83d59efc --- /dev/null +++ b/SemanticInformation.cpp @@ -0,0 +1,124 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file SemanticInformation.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Helper to provide semantic information about assembly items. + */ + +#include <libevmasm/SemanticInformation.h> +#include <libevmasm/AssemblyItem.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + +bool SemanticInformation::breaksCSEAnalysisBlock(AssemblyItem const& _item) +{ + switch (_item.type()) + { + default: + case UndefinedItem: + case Tag: + return true; + case Push: + case PushString: + case PushTag: + case PushSub: + case PushSubSize: + case PushProgramSize: + case PushData: + return false; + case Operation: + { + if (isSwapInstruction(_item) || isDupInstruction(_item)) + return false; + if (_item.instruction() == Instruction::GAS || _item.instruction() == Instruction::PC) + return true; // GAS and PC assume a specific order of opcodes + if (_item.instruction() == Instruction::MSIZE) + return true; // msize is modified already by memory access, avoid that for now + InstructionInfo info = instructionInfo(_item.instruction()); + if (_item.instruction() == Instruction::SSTORE) + return false; + if (_item.instruction() == Instruction::MSTORE) + return false; + //@todo: We do not handle the following memory instructions for now: + // calldatacopy, codecopy, extcodecopy, mstore8, + // msize (note that msize also depends on memory read access) + + // the second requirement will be lifted once it is implemented + return info.sideEffects || info.args > 2; + } + } +} + +bool SemanticInformation::isCommutativeOperation(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + switch (_item.instruction()) + { + case Instruction::ADD: + case Instruction::MUL: + case Instruction::EQ: + case Instruction::AND: + case Instruction::OR: + case Instruction::XOR: + return true; + default: + return false; + } +} + +bool SemanticInformation::isDupInstruction(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + return Instruction::DUP1 <= _item.instruction() && _item.instruction() <= Instruction::DUP16; +} + +bool SemanticInformation::isSwapInstruction(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + return Instruction::SWAP1 <= _item.instruction() && _item.instruction() <= Instruction::SWAP16; +} + +bool SemanticInformation::isJumpInstruction(AssemblyItem const& _item) +{ + return _item == AssemblyItem(Instruction::JUMP) || _item == AssemblyItem(Instruction::JUMPI); +} + +bool SemanticInformation::altersControlFlow(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + switch (_item.instruction()) + { + // note that CALL, CALLCODE and CREATE do not really alter the control flow, because we + // continue on the next instruction (unless an exception happens which can always happen) + case Instruction::JUMP: + case Instruction::JUMPI: + case Instruction::RETURN: + case Instruction::SUICIDE: + case Instruction::STOP: + return true; + default: + return false; + } +} diff --git a/SemanticInformation.h b/SemanticInformation.h new file mode 100644 index 00000000..27aa6f1a --- /dev/null +++ b/SemanticInformation.h @@ -0,0 +1,51 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file SemanticInformation.h + * @author Christian <c@ethdev.com> + * @date 2015 + * Helper to provide semantic information about assembly items. + */ + +#pragma once + + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; + +/** + * Helper functions to provide context-independent information about assembly items. + */ +struct SemanticInformation +{ + /// @returns true if the given items starts a new block for common subexpression analysis. + static bool breaksCSEAnalysisBlock(AssemblyItem const& _item); + /// @returns true if the item is a two-argument operation whose value does not depend on the + /// order of its arguments. + static bool isCommutativeOperation(AssemblyItem const& _item); + static bool isDupInstruction(AssemblyItem const& _item); + static bool isSwapInstruction(AssemblyItem const& _item); + static bool isJumpInstruction(AssemblyItem const& _item); + static bool altersControlFlow(AssemblyItem const& _item); +}; + +} +} diff --git a/SourceLocation.h b/SourceLocation.h new file mode 100644 index 00000000..35e3c031 --- /dev/null +++ b/SourceLocation.h @@ -0,0 +1,89 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @author Lefteris Karapetsas <lefteris@ethdev.com> + * @date 2015 + * Represents a location in a source file + */ + +#pragma once + +#include <memory> +#include <string> +#include <ostream> +#include <tuple> + +namespace dev +{ + +/** + * Representation of an interval of source positions. + * The interval includes start and excludes end. + */ +struct SourceLocation +{ + SourceLocation(int _start, int _end, std::shared_ptr<std::string const> _sourceName): + start(_start), end(_end), sourceName(_sourceName) { } + SourceLocation(): start(-1), end(-1) { } + + SourceLocation(SourceLocation const& _other): + start(_other.start), end(_other.end), sourceName(_other.sourceName) {} + SourceLocation& operator=(SourceLocation const& _other) { start = _other.start; end = _other.end; sourceName = _other.sourceName; return *this;} + + bool operator==(SourceLocation const& _other) const { return start == _other.start && end == _other.end;} + bool operator!=(SourceLocation const& _other) const { return !operator==(_other); } + inline bool operator<(SourceLocation const& _other) const; + inline bool contains(SourceLocation const& _other) const; + inline bool intersects(SourceLocation const& _other) const; + + bool isEmpty() const { return start == -1 && end == -1; } + + int start; + int end; + std::shared_ptr<std::string const> sourceName; +}; + +/// Stream output for Location (used e.g. in boost exceptions). +inline std::ostream& operator<<(std::ostream& _out, SourceLocation const& _location) +{ + if (_location.isEmpty()) + return _out << "NO_LOCATION_SPECIFIED"; + return _out << *_location.sourceName << "[" << _location.start << "," << _location.end << ")"; +} + +bool SourceLocation::operator<(SourceLocation const& _other) const +{ + if (!sourceName || !_other.sourceName) + return int(!!sourceName) < int(!!_other.sourceName); + return make_tuple(*sourceName, start, end) < make_tuple(*_other.sourceName, _other.start, _other.end); +} + +bool SourceLocation::contains(SourceLocation const& _other) const +{ + if (isEmpty() || _other.isEmpty() || !sourceName || !_other.sourceName || *sourceName != *_other.sourceName) + return false; + return start <= _other.start && _other.end <= end; +} + +bool SourceLocation::intersects(SourceLocation const& _other) const +{ + if (isEmpty() || _other.isEmpty() || !sourceName || !_other.sourceName || *sourceName != *_other.sourceName) + return false; + return _other.start < end && start < _other.end; +} + +} |