diff options
author | chriseth <c@ethdev.com> | 2016-03-29 18:09:01 +0800 |
---|---|---|
committer | chriseth <c@ethdev.com> | 2016-03-29 18:09:01 +0800 |
commit | dc536004a44ee91b9196c9d51ed317ba86300ead (patch) | |
tree | 0e0f2716f74b3ece09f5bb1ffdb259c0119899f7 | |
parent | 14d6b0b2d6b35013494e585fef6a88b3e3fa2b5e (diff) | |
parent | 79167aa8977af99c93d79de1de51c71082922f23 (diff) | |
download | dexon-solidity-dc536004a44ee91b9196c9d51ed317ba86300ead.tar.gz dexon-solidity-dc536004a44ee91b9196c9d51ed317ba86300ead.tar.zst dexon-solidity-dc536004a44ee91b9196c9d51ed317ba86300ead.zip |
Merge pull request #448 from chriseth/movesol
Move evmasm and lll.
44 files changed, 6424 insertions, 1 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index a02b779e..18566bc7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,10 +26,13 @@ include(EthUtils) include(EthOptions) configure_project(TESTS) +add_subdirectory(libevmasm) add_subdirectory(libsolidity) add_subdirectory(solc) if (NOT EMSCRIPTEN) + add_subdirectory(liblll) add_subdirectory(test) + add_subdirectory(lllc) endif() # TODO installation and packaging rules diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp new file mode 100644 index 00000000..7277865e --- /dev/null +++ b/libevmasm/Assembly.cpp @@ -0,0 +1,545 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Assembly.cpp + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#include "Assembly.h" +#include <fstream> +#include <libdevcore/Log.h> +#include <libevmasm/CommonSubexpressionEliminator.h> +#include <libevmasm/ControlFlowGraph.h> +#include <libevmasm/BlockDeduplicator.h> +#include <libevmasm/ConstantOptimiser.h> +#include <libevmasm/GasMeter.h> +#include <json/json.h> +using namespace std; +using namespace dev; +using namespace dev::eth; + +void Assembly::append(Assembly const& _a) +{ + auto newDeposit = m_deposit + _a.deposit(); + for (AssemblyItem i: _a.m_items) + { + if (i.type() == Tag || i.type() == PushTag) + i.setData(i.data() + m_usedTags); + else if (i.type() == PushSub || i.type() == PushSubSize) + i.setData(i.data() + m_subs.size()); + append(i); + } + m_deposit = newDeposit; + m_usedTags += _a.m_usedTags; + for (auto const& i: _a.m_data) + m_data.insert(i); + for (auto const& i: _a.m_strings) + m_strings.insert(i); + m_subs += _a.m_subs; + for (auto const& lib: _a.m_libraries) + m_libraries.insert(lib); + + assert(!_a.m_baseDeposit); + assert(!_a.m_totalDeposit); +} + +void Assembly::append(Assembly const& _a, int _deposit) +{ + if (_deposit > _a.m_deposit) + BOOST_THROW_EXCEPTION(InvalidDeposit()); + else + { + append(_a); + while (_deposit++ < _a.m_deposit) + append(Instruction::POP); + } +} + +string Assembly::out() const +{ + stringstream ret; + stream(ret); + return ret.str(); +} + +unsigned Assembly::bytesRequired() const +{ + for (unsigned br = 1;; ++br) + { + unsigned ret = 1; + for (auto const& i: m_data) + ret += i.second.size(); + + for (AssemblyItem const& i: m_items) + ret += i.bytesRequired(br); + if (dev::bytesRequired(ret) <= br) + return ret; + } +} + +string Assembly::locationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const +{ + if (_location.isEmpty() || _sourceCodes.empty() || _location.start >= _location.end || _location.start < 0) + return ""; + + auto it = _sourceCodes.find(*_location.sourceName); + if (it == _sourceCodes.end()) + return ""; + + string const& source = it->second; + if (size_t(_location.start) >= source.size()) + return ""; + + string cut = source.substr(_location.start, _location.end - _location.start); + auto newLinePos = cut.find_first_of("\n"); + if (newLinePos != string::npos) + cut = cut.substr(0, newLinePos) + "..."; + + return cut; +} + +ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap const& _sourceCodes) const +{ + _out << _prefix << ".code:" << endl; + for (AssemblyItem const& i: m_items) + { + _out << _prefix; + switch (i.type()) + { + case Operation: + _out << " " << instructionInfo(i.instruction()).name << "\t" << i.getJumpTypeAsString(); + break; + case Push: + _out << " PUSH " << hex << i.data(); + break; + case PushString: + _out << " PUSH \"" << m_strings.at((h256)i.data()) << "\""; + break; + case PushTag: + if (i.data() == 0) + _out << " PUSH [ErrorTag]"; + else + _out << " PUSH [tag" << dec << i.data() << "]"; + break; + case PushSub: + _out << " PUSH [$" << h256(i.data()).abridgedMiddle() << "]"; + break; + case PushSubSize: + _out << " PUSH #[$" << h256(i.data()).abridgedMiddle() << "]"; + break; + case PushProgramSize: + _out << " PUSHSIZE"; + break; + case PushLibraryAddress: + _out << " PUSHLIB \"" << m_libraries.at(h256(i.data())) << "\""; + break; + case Tag: + _out << "tag" << dec << i.data() << ": " << endl << _prefix << " JUMPDEST"; + break; + case PushData: + _out << " PUSH [" << hex << (unsigned)i.data() << "]"; + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + _out << "\t\t" << locationFromSources(_sourceCodes, i.location()) << endl; + } + + if (!m_data.empty() || !m_subs.empty()) + { + _out << _prefix << ".data:" << endl; + for (auto const& i: m_data) + if (u256(i.first) >= m_subs.size()) + _out << _prefix << " " << hex << (unsigned)(u256)i.first << ": " << dev::toHex(i.second) << endl; + for (size_t i = 0; i < m_subs.size(); ++i) + { + _out << _prefix << " " << hex << i << ": " << endl; + m_subs[i].stream(_out, _prefix + " ", _sourceCodes); + } + } + return _out; +} + +Json::Value Assembly::createJsonValue(string _name, int _begin, int _end, string _value, string _jumpType) const +{ + Json::Value value; + value["name"] = _name; + value["begin"] = _begin; + value["end"] = _end; + if (!_value.empty()) + value["value"] = _value; + if (!_jumpType.empty()) + value["jumpType"] = _jumpType; + return value; +} + +string toStringInHex(u256 _value) +{ + std::stringstream hexStr; + hexStr << hex << _value; + return hexStr.str(); +} + +Json::Value Assembly::streamAsmJson(ostream& _out, StringMap const& _sourceCodes) const +{ + Json::Value root; + + Json::Value collection(Json::arrayValue); + for (AssemblyItem const& i: m_items) + { + switch (i.type()) + { + case Operation: + collection.append( + createJsonValue(instructionInfo(i.instruction()).name, i.location().start, i.location().end, i.getJumpTypeAsString())); + break; + case Push: + collection.append( + createJsonValue("PUSH", i.location().start, i.location().end, toStringInHex(i.data()), i.getJumpTypeAsString())); + break; + case PushString: + collection.append( + createJsonValue("PUSH tag", i.location().start, i.location().end, m_strings.at((h256)i.data()))); + break; + case PushTag: + if (i.data() == 0) + collection.append( + createJsonValue("PUSH [ErrorTag]", i.location().start, i.location().end, "")); + else + collection.append( + createJsonValue("PUSH [tag]", i.location().start, i.location().end, string(i.data()))); + break; + case PushSub: + collection.append( + createJsonValue("PUSH [$]", i.location().start, i.location().end, dev::toString(h256(i.data())))); + break; + case PushSubSize: + collection.append( + createJsonValue("PUSH #[$]", i.location().start, i.location().end, dev::toString(h256(i.data())))); + break; + case PushProgramSize: + collection.append( + createJsonValue("PUSHSIZE", i.location().start, i.location().end)); + break; + case PushLibraryAddress: + collection.append( + createJsonValue("PUSHLIB", i.location().start, i.location().end, m_libraries.at(h256(i.data()))) + ); + break; + case Tag: + collection.append( + createJsonValue("tag", i.location().start, i.location().end, string(i.data()))); + collection.append( + createJsonValue("JUMPDEST", i.location().start, i.location().end)); + break; + case PushData: + collection.append(createJsonValue("PUSH data", i.location().start, i.location().end, toStringInHex(i.data()))); + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + } + + root[".code"] = collection; + + if (!m_data.empty() || !m_subs.empty()) + { + Json::Value data; + for (auto const& i: m_data) + if (u256(i.first) >= m_subs.size()) + data[toStringInHex((u256)i.first)] = toHex(i.second); + + for (size_t i = 0; i < m_subs.size(); ++i) + { + std::stringstream hexStr; + hexStr << hex << i; + data[hexStr.str()] = m_subs[i].stream(_out, "", _sourceCodes, true); + } + root[".data"] = data; + _out << root; + } + return root; +} + +Json::Value Assembly::stream(ostream& _out, string const& _prefix, StringMap const& _sourceCodes, bool _inJsonFormat) const +{ + if (_inJsonFormat) + return streamAsmJson(_out, _sourceCodes); + else + { + streamAsm(_out, _prefix, _sourceCodes); + return Json::Value(); + } +} + +AssemblyItem const& Assembly::append(AssemblyItem const& _i) +{ + m_deposit += _i.deposit(); + m_items.push_back(_i); + if (m_items.back().location().isEmpty() && !m_currentSourceLocation.isEmpty()) + m_items.back().setLocation(m_currentSourceLocation); + return back(); +} + +AssemblyItem Assembly::newPushLibraryAddress(string const& _identifier) +{ + h256 h(dev::sha3(_identifier)); + m_libraries[h] = _identifier; + return AssemblyItem(PushLibraryAddress, h); +} + +void Assembly::injectStart(AssemblyItem const& _i) +{ + m_items.insert(m_items.begin(), _i); +} + +struct OptimiserChannel: public LogChannel { static const char* name() { return "OPT"; } static const int verbosity = 12; }; +#define copt dev::LogOutputStream<OptimiserChannel, true>() + +Assembly& Assembly::optimise(bool _enable, bool _isCreation, size_t _runs) +{ + if (!_enable) + return *this; + + unsigned total = 0; + for (unsigned count = 1; count > 0; total += count) + { + copt << toString(*this); + count = 0; + + copt << "Performing optimisation..."; + // This only modifies PushTags, we have to run again to actually remove code. + BlockDeduplicator dedup(m_items); + if (dedup.deduplicate()) + count++; + + { + ControlFlowGraph cfg(m_items); + AssemblyItems optimisedItems; + for (BasicBlock const& block: cfg.optimisedBlocks()) + { + assertThrow(!!block.startState, OptimizerException, ""); + CommonSubexpressionEliminator eliminator(*block.startState); + auto iter = m_items.begin() + block.begin; + auto const end = m_items.begin() + block.end; + while (iter < end) + { + auto orig = iter; + iter = eliminator.feedItems(iter, end); + bool shouldReplace = false; + AssemblyItems optimisedChunk; + try + { + optimisedChunk = eliminator.getOptimizedItems(); + shouldReplace = (optimisedChunk.size() < size_t(iter - orig)); + } + catch (StackTooDeepException const&) + { + // This might happen if the opcode reconstruction is not as efficient + // as the hand-crafted code. + } + catch (ItemNotAvailableException const&) + { + // This might happen if e.g. associativity and commutativity rules + // reorganise the expression tree, but not all leaves are available. + } + + if (shouldReplace) + { + copt << "Old size: " << (iter - orig) << ", new size: " << optimisedChunk.size(); + count++; + optimisedItems += optimisedChunk; + } + else + copy(orig, iter, back_inserter(optimisedItems)); + } + } + + if (optimisedItems.size() < m_items.size()) + { + m_items = move(optimisedItems); + count++; + } + } + } + + total += ConstantOptimisationMethod::optimiseConstants( + _isCreation, + _isCreation ? 1 : _runs, + *this, + m_items + ); + + copt << total << " optimisations done."; + + for (auto& sub: m_subs) + sub.optimise(true, false, _runs); + + return *this; +} + +LinkerObject const& Assembly::assemble() const +{ + if (!m_assembledObject.bytecode.empty()) + return m_assembledObject; + + LinkerObject& ret = m_assembledObject; + + unsigned totalBytes = bytesRequired(); + vector<unsigned> tagPos(m_usedTags); + map<unsigned, unsigned> tagRef; + multimap<h256, unsigned> dataRef; + multimap<size_t, size_t> subRef; + vector<unsigned> sizeRef; ///< Pointers to code locations where the size of the program is inserted + unsigned bytesPerTag = dev::bytesRequired(totalBytes); + byte tagPush = (byte)Instruction::PUSH1 - 1 + bytesPerTag; + + unsigned bytesRequiredIncludingData = bytesRequired(); + for (auto const& sub: m_subs) + bytesRequiredIncludingData += sub.assemble().bytecode.size(); + + unsigned bytesPerDataRef = dev::bytesRequired(bytesRequiredIncludingData); + byte dataRefPush = (byte)Instruction::PUSH1 - 1 + bytesPerDataRef; + ret.bytecode.reserve(bytesRequiredIncludingData); + + for (AssemblyItem const& i: m_items) + { + // store position of the invalid jump destination + if (i.type() != Tag && tagPos[0] == 0) + tagPos[0] = ret.bytecode.size(); + + switch (i.type()) + { + case Operation: + ret.bytecode.push_back((byte)i.data()); + break; + case PushString: + { + ret.bytecode.push_back((byte)Instruction::PUSH32); + unsigned ii = 0; + for (auto j: m_strings.at((h256)i.data())) + if (++ii > 32) + break; + else + ret.bytecode.push_back((byte)j); + while (ii++ < 32) + ret.bytecode.push_back(0); + break; + } + case Push: + { + byte b = max<unsigned>(1, dev::bytesRequired(i.data())); + ret.bytecode.push_back((byte)Instruction::PUSH1 - 1 + b); + ret.bytecode.resize(ret.bytecode.size() + b); + bytesRef byr(&ret.bytecode.back() + 1 - b, b); + toBigEndian(i.data(), byr); + break; + } + case PushTag: + { + ret.bytecode.push_back(tagPush); + tagRef[ret.bytecode.size()] = (unsigned)i.data(); + ret.bytecode.resize(ret.bytecode.size() + bytesPerTag); + break; + } + case PushData: + ret.bytecode.push_back(dataRefPush); + dataRef.insert(make_pair((h256)i.data(), ret.bytecode.size())); + ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); + break; + case PushSub: + ret.bytecode.push_back(dataRefPush); + subRef.insert(make_pair(size_t(i.data()), ret.bytecode.size())); + ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); + break; + case PushSubSize: + { + auto s = m_subs.at(size_t(i.data())).assemble().bytecode.size(); + i.setPushedValue(u256(s)); + byte b = max<unsigned>(1, dev::bytesRequired(s)); + ret.bytecode.push_back((byte)Instruction::PUSH1 - 1 + b); + ret.bytecode.resize(ret.bytecode.size() + b); + bytesRef byr(&ret.bytecode.back() + 1 - b, b); + toBigEndian(s, byr); + break; + } + case PushProgramSize: + { + ret.bytecode.push_back(dataRefPush); + sizeRef.push_back(ret.bytecode.size()); + ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); + break; + } + case PushLibraryAddress: + ret.bytecode.push_back(byte(Instruction::PUSH20)); + ret.linkReferences[ret.bytecode.size()] = m_libraries.at(i.data()); + ret.bytecode.resize(ret.bytecode.size() + 20); + break; + case Tag: + tagPos[(unsigned)i.data()] = ret.bytecode.size(); + assertThrow(i.data() != 0, AssemblyException, ""); + ret.bytecode.push_back((byte)Instruction::JUMPDEST); + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + } + for (auto const& i: tagRef) + { + bytesRef r(ret.bytecode.data() + i.first, bytesPerTag); + auto tag = i.second; + if (tag >= tagPos.size()) + tag = 0; + if (tag == 0) + assertThrow(tagPos[tag] != 0, AssemblyException, ""); + + toBigEndian(tagPos[tag], r); + } + + if (!dataRef.empty() && !subRef.empty()) + ret.bytecode.push_back(0); + for (size_t i = 0; i < m_subs.size(); ++i) + { + auto references = subRef.equal_range(i); + if (references.first == references.second) + continue; + for (auto ref = references.first; ref != references.second; ++ref) + { + bytesRef r(ret.bytecode.data() + ref->second, bytesPerDataRef); + toBigEndian(ret.bytecode.size(), r); + } + ret.append(m_subs[i].assemble()); + } + for (auto const& dataItem: m_data) + { + auto references = dataRef.equal_range(dataItem.first); + if (references.first == references.second) + continue; + for (auto ref = references.first; ref != references.second; ++ref) + { + bytesRef r(ret.bytecode.data() + ref->second, bytesPerDataRef); + toBigEndian(ret.bytecode.size(), r); + } + ret.bytecode += dataItem.second; + } + for (unsigned pos: sizeRef) + { + bytesRef r(ret.bytecode.data() + pos, bytesPerDataRef); + toBigEndian(ret.bytecode.size(), r); + } + return ret; +} diff --git a/libevmasm/Assembly.h b/libevmasm/Assembly.h new file mode 100644 index 00000000..28328277 --- /dev/null +++ b/libevmasm/Assembly.h @@ -0,0 +1,151 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Assembly.h + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#pragma once + +#include <iostream> +#include <sstream> +#include <libdevcore/Common.h> +#include <libdevcore/Assertions.h> +#include <libdevcore/SHA3.h> +#include <libevmcore/Instruction.h> +#include <libevmasm/SourceLocation.h> +#include <libevmasm/AssemblyItem.h> +#include <libevmasm/LinkerObject.h> +#include "Exceptions.h" +#include <json/json.h> + +namespace Json +{ +class Value; +} +namespace dev +{ +namespace eth +{ + +class Assembly +{ +public: + Assembly() {} + + AssemblyItem newTag() { return AssemblyItem(Tag, m_usedTags++); } + AssemblyItem newPushTag() { return AssemblyItem(PushTag, m_usedTags++); } + AssemblyItem newData(bytes const& _data) { h256 h(sha3(asString(_data))); m_data[h] = _data; return AssemblyItem(PushData, h); } + AssemblyItem newSub(Assembly const& _sub) { m_subs.push_back(_sub); return AssemblyItem(PushSub, m_subs.size() - 1); } + Assembly const& sub(size_t _sub) const { return m_subs.at(_sub); } + Assembly& sub(size_t _sub) { return m_subs.at(_sub); } + AssemblyItem newPushString(std::string const& _data) { h256 h(sha3(_data)); m_strings[h] = _data; return AssemblyItem(PushString, h); } + AssemblyItem newPushSubSize(u256 const& _subId) { return AssemblyItem(PushSubSize, _subId); } + AssemblyItem newPushLibraryAddress(std::string const& _identifier); + + AssemblyItem append() { return append(newTag()); } + void append(Assembly const& _a); + void append(Assembly const& _a, int _deposit); + AssemblyItem const& append(AssemblyItem const& _i); + AssemblyItem const& append(std::string const& _data) { return append(newPushString(_data)); } + AssemblyItem const& append(bytes const& _data) { return append(newData(_data)); } + AssemblyItem appendSubSize(Assembly const& _a) { auto ret = newSub(_a); append(newPushSubSize(ret.data())); return ret; } + /// Pushes the final size of the current assembly itself. Use this when the code is modified + /// after compilation and CODESIZE is not an option. + void appendProgramSize() { append(AssemblyItem(PushProgramSize)); } + void appendLibraryAddress(std::string const& _identifier) { append(newPushLibraryAddress(_identifier)); } + + AssemblyItem appendJump() { auto ret = append(newPushTag()); append(Instruction::JUMP); return ret; } + AssemblyItem appendJumpI() { auto ret = append(newPushTag()); append(Instruction::JUMPI); return ret; } + AssemblyItem appendJump(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMP); return ret; } + AssemblyItem appendJumpI(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMPI); return ret; } + AssemblyItem errorTag() { return AssemblyItem(PushTag, 0); } + + template <class T> Assembly& operator<<(T const& _d) { append(_d); return *this; } + AssemblyItems const& items() const { return m_items; } + AssemblyItem const& back() const { return m_items.back(); } + std::string backString() const { return m_items.size() && m_items.back().type() == PushString ? m_strings.at((h256)m_items.back().data()) : std::string(); } + + void onePath() { if (asserts(!m_totalDeposit && !m_baseDeposit)) BOOST_THROW_EXCEPTION(InvalidDeposit()); m_baseDeposit = m_deposit; m_totalDeposit = INT_MAX; } + void otherPath() { donePath(); m_totalDeposit = m_deposit; m_deposit = m_baseDeposit; } + void donePaths() { donePath(); m_totalDeposit = m_baseDeposit = 0; } + void ignored() { m_baseDeposit = m_deposit; } + void endIgnored() { m_deposit = m_baseDeposit; m_baseDeposit = 0; } + + void popTo(int _deposit) { while (m_deposit > _deposit) append(Instruction::POP); } + + void injectStart(AssemblyItem const& _i); + std::string out() const; + int deposit() const { return m_deposit; } + void adjustDeposit(int _adjustment) { m_deposit += _adjustment; if (asserts(m_deposit >= 0)) BOOST_THROW_EXCEPTION(InvalidDeposit()); } + void setDeposit(int _deposit) { m_deposit = _deposit; if (asserts(m_deposit >= 0)) BOOST_THROW_EXCEPTION(InvalidDeposit()); } + + /// Changes the source location used for each appended item. + void setSourceLocation(SourceLocation const& _location) { m_currentSourceLocation = _location; } + + /// Assembles the assembly into bytecode. The assembly should not be modified after this call. + LinkerObject const& assemble() const; + bytes const& data(h256 const& _i) const { return m_data.at(_i); } + + /// Modify (if @a _enable is set) and return the current assembly such that creation and + /// execution gas usage is optimised. @a _isCreation should be true for the top-level assembly. + /// @a _runs specifes an estimate on how often each opcode in this assembly will be executed, + /// i.e. use a small value to optimise for size and a large value to optimise for runtime. + Assembly& optimise(bool _enable, bool _isCreation = true, size_t _runs = 200); + Json::Value stream( + std::ostream& _out, + std::string const& _prefix = "", + const StringMap &_sourceCodes = StringMap(), + bool _inJsonFormat = false + ) const; + +protected: + std::string locationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const; + void donePath() { if (m_totalDeposit != INT_MAX && m_totalDeposit != m_deposit) BOOST_THROW_EXCEPTION(InvalidDeposit()); } + unsigned bytesRequired() const; + +private: + Json::Value streamAsmJson(std::ostream& _out, StringMap const& _sourceCodes) const; + std::ostream& streamAsm(std::ostream& _out, std::string const& _prefix, StringMap const& _sourceCodes) const; + Json::Value createJsonValue(std::string _name, int _begin, int _end, std::string _value = std::string(), std::string _jumpType = std::string()) const; + +protected: + // 0 is reserved for exception + unsigned m_usedTags = 1; + AssemblyItems m_items; + std::map<h256, bytes> m_data; + std::vector<Assembly> m_subs; + std::map<h256, std::string> m_strings; + std::map<h256, std::string> m_libraries; ///< Identifiers of libraries to be linked. + + mutable LinkerObject m_assembledObject; + + int m_deposit = 0; + int m_baseDeposit = 0; + int m_totalDeposit = 0; + + SourceLocation m_currentSourceLocation; +}; + +inline std::ostream& operator<<(std::ostream& _out, Assembly const& _a) +{ + _a.stream(_out); + return _out; +} + +} +} diff --git a/libevmasm/AssemblyItem.cpp b/libevmasm/AssemblyItem.cpp new file mode 100644 index 00000000..d7051064 --- /dev/null +++ b/libevmasm/AssemblyItem.cpp @@ -0,0 +1,134 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Assembly.cpp + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#include "AssemblyItem.h" +#include <fstream> + +using namespace std; +using namespace dev; +using namespace dev::eth; + +unsigned AssemblyItem::bytesRequired(unsigned _addressLength) const +{ + switch (m_type) + { + case Operation: + case Tag: // 1 byte for the JUMPDEST + return 1; + case PushString: + return 33; + case Push: + return 1 + max<unsigned>(1, dev::bytesRequired(m_data)); + case PushSubSize: + case PushProgramSize: + return 4; // worst case: a 16MB program + case PushTag: + case PushData: + case PushSub: + return 1 + _addressLength; + case PushLibraryAddress: + return 21; + default: + break; + } + BOOST_THROW_EXCEPTION(InvalidOpcode()); +} + +int AssemblyItem::deposit() const +{ + switch (m_type) + { + case Operation: + return instructionInfo(instruction()).ret - instructionInfo(instruction()).args; + case Push: + case PushString: + case PushTag: + case PushData: + case PushSub: + case PushSubSize: + case PushProgramSize: + case PushLibraryAddress: + return 1; + case Tag: + return 0; + default:; + } + return 0; +} + +string AssemblyItem::getJumpTypeAsString() const +{ + switch (m_jumpType) + { + case JumpType::IntoFunction: + return "[in]"; + case JumpType::OutOfFunction: + return "[out]"; + case JumpType::Ordinary: + default: + return ""; + } +} + +ostream& dev::eth::operator<<(ostream& _out, AssemblyItem const& _item) +{ + switch (_item.type()) + { + case Operation: + _out << " " << instructionInfo(_item.instruction()).name; + if (_item.instruction() == eth::Instruction::JUMP || _item.instruction() == eth::Instruction::JUMPI) + _out << "\t" << _item.getJumpTypeAsString(); + break; + case Push: + _out << " PUSH " << hex << _item.data(); + break; + case PushString: + _out << " PushString" << hex << (unsigned)_item.data(); + break; + case PushTag: + _out << " PushTag " << _item.data(); + break; + case Tag: + _out << " Tag " << _item.data(); + break; + case PushData: + _out << " PushData " << hex << (unsigned)_item.data(); + break; + case PushSub: + _out << " PushSub " << hex << h256(_item.data()).abridgedMiddle(); + break; + case PushSubSize: + _out << " PushSubSize " << hex << h256(_item.data()).abridgedMiddle(); + break; + case PushProgramSize: + _out << " PushProgramSize"; + break; + case PushLibraryAddress: + _out << " PushLibraryAddress " << hex << h256(_item.data()).abridgedMiddle(); + break; + case UndefinedItem: + _out << " ???"; + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + return _out; +} diff --git a/libevmasm/AssemblyItem.h b/libevmasm/AssemblyItem.h new file mode 100644 index 00000000..795b5a8a --- /dev/null +++ b/libevmasm/AssemblyItem.h @@ -0,0 +1,123 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Assembly.h + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#pragma once + +#include <iostream> +#include <sstream> +#include <libdevcore/Common.h> +#include <libdevcore/Assertions.h> +#include <libevmcore/Instruction.h> +#include <libevmasm/SourceLocation.h> +#include "Exceptions.h" + +namespace dev +{ +namespace eth +{ + +enum AssemblyItemType { + UndefinedItem, + Operation, + Push, + PushString, + PushTag, + PushSub, + PushSubSize, + PushProgramSize, + Tag, + PushData, + PushLibraryAddress ///< Push a currently unknown address of another (library) contract. +}; + +class Assembly; + +class AssemblyItem +{ +public: + enum class JumpType { Ordinary, IntoFunction, OutOfFunction }; + + AssemblyItem(u256 _push, SourceLocation const& _location = SourceLocation()): + AssemblyItem(Push, _push, _location) { } + AssemblyItem(Instruction _i, SourceLocation const& _location = SourceLocation()): + AssemblyItem(Operation, byte(_i), _location) { } + AssemblyItem(AssemblyItemType _type, u256 _data = 0, SourceLocation const& _location = SourceLocation()): + m_type(_type), + m_data(_data), + m_location(_location) + { + } + + AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, m_data); } + AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, m_data); } + + AssemblyItemType type() const { return m_type; } + u256 const& data() const { return m_data; } + void setType(AssemblyItemType const _type) { m_type = _type; } + void setData(u256 const& _data) { m_data = _data; } + + /// @returns the instruction of this item (only valid if type() == Operation) + Instruction instruction() const { return Instruction(byte(m_data)); } + + /// @returns true if the type and data of the items are equal. + bool operator==(AssemblyItem const& _other) const { return m_type == _other.m_type && m_data == _other.m_data; } + bool operator!=(AssemblyItem const& _other) const { return !operator==(_other); } + /// Less-than operator compatible with operator==. + bool operator<(AssemblyItem const& _other) const { return std::tie(m_type, m_data) < std::tie(_other.m_type, _other.m_data); } + + /// @returns an upper bound for the number of bytes required by this item, assuming that + /// the value of a jump tag takes @a _addressLength bytes. + unsigned bytesRequired(unsigned _addressLength) const; + int deposit() const; + + bool match(AssemblyItem const& _i) const { return _i.m_type == UndefinedItem || (m_type == _i.m_type && (m_type != Operation || m_data == _i.m_data)); } + void setLocation(SourceLocation const& _location) { m_location = _location; } + SourceLocation const& location() const { return m_location; } + + void setJumpType(JumpType _jumpType) { m_jumpType = _jumpType; } + JumpType getJumpType() const { return m_jumpType; } + std::string getJumpTypeAsString() const; + + void setPushedValue(u256 const& _value) const { m_pushedValue = std::make_shared<u256>(_value); } + u256 const* pushedValue() const { return m_pushedValue.get(); } + +private: + AssemblyItemType m_type; + u256 m_data; + SourceLocation m_location; + JumpType m_jumpType = JumpType::Ordinary; + /// Pushed value for operations with data to be determined during assembly stage, + /// e.g. PushSubSize, PushTag, PushSub, etc. + mutable std::shared_ptr<u256> m_pushedValue; +}; + +using AssemblyItems = std::vector<AssemblyItem>; + +std::ostream& operator<<(std::ostream& _out, AssemblyItem const& _item); +inline std::ostream& operator<<(std::ostream& _out, AssemblyItems const& _items) +{ + for (AssemblyItem const& item: _items) + _out << item; + return _out; +} + +} +} diff --git a/libevmasm/BlockDeduplicator.cpp b/libevmasm/BlockDeduplicator.cpp new file mode 100644 index 00000000..d930ea22 --- /dev/null +++ b/libevmasm/BlockDeduplicator.cpp @@ -0,0 +1,126 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file BlockDeduplicator.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Unifies basic blocks that share content. + */ + +#include <libevmasm/BlockDeduplicator.h> +#include <functional> +#include <libevmasm/AssemblyItem.h> +#include <libevmasm/SemanticInformation.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + + +bool BlockDeduplicator::deduplicate() +{ + // Compares indices based on the suffix that starts there, ignoring tags and stopping at + // opcodes that stop the control flow. + + // Virtual tag that signifies "the current block" and which is used to optimise loops. + // We abort if this virtual tag actually exists. + AssemblyItem pushSelf(PushTag, u256(-4)); + if ( + std::count(m_items.cbegin(), m_items.cend(), pushSelf.tag()) || + std::count(m_items.cbegin(), m_items.cend(), pushSelf.pushTag()) + ) + return false; + + function<bool(size_t, size_t)> comparator = [&](size_t _i, size_t _j) + { + if (_i == _j) + return false; + + // To compare recursive loops, we have to already unify PushTag opcodes of the + // block's own tag. + AssemblyItem pushFirstTag(pushSelf); + AssemblyItem pushSecondTag(pushSelf); + + if (_i < m_items.size() && m_items.at(_i).type() == Tag) + pushFirstTag = m_items.at(_i).pushTag(); + if (_j < m_items.size() && m_items.at(_j).type() == Tag) + pushSecondTag = m_items.at(_j).pushTag(); + + BlockIterator first(m_items.begin() + _i, m_items.end(), &pushFirstTag, &pushSelf); + BlockIterator second(m_items.begin() + _j, m_items.end(), &pushSecondTag, &pushSelf); + BlockIterator end(m_items.end(), m_items.end()); + + if (first != end && (*first).type() == Tag) + ++first; + if (second != end && (*second).type() == Tag) + ++second; + + return std::lexicographical_compare(first, end, second, end); + }; + + size_t iterations = 0; + for (; ; ++iterations) + { + //@todo this should probably be optimized. + set<size_t, function<bool(size_t, size_t)>> blocksSeen(comparator); + map<u256, u256> tagReplacement; + for (size_t i = 0; i < m_items.size(); ++i) + { + if (m_items.at(i).type() != Tag) + continue; + auto it = blocksSeen.find(i); + if (it == blocksSeen.end()) + blocksSeen.insert(i); + else + tagReplacement[m_items.at(i).data()] = m_items.at(*it).data(); + } + + bool changed = false; + for (AssemblyItem& item: m_items) + if (item.type() == PushTag && tagReplacement.count(item.data())) + { + changed = true; + item.setData(tagReplacement.at(item.data())); + } + if (!changed) + break; + } + return iterations > 0; +} + +BlockDeduplicator::BlockIterator& BlockDeduplicator::BlockIterator::operator++() +{ + if (it == end) + return *this; + if (SemanticInformation::altersControlFlow(*it) && *it != AssemblyItem(eth::Instruction::JUMPI)) + it = end; + else + { + ++it; + while (it != end && it->type() == Tag) + ++it; + } + return *this; +} + +AssemblyItem const& BlockDeduplicator::BlockIterator::operator*() const +{ + if (replaceItem && replaceWith && *it == *replaceItem) + return *replaceWith; + else + return *it; +} diff --git a/libevmasm/BlockDeduplicator.h b/libevmasm/BlockDeduplicator.h new file mode 100644 index 00000000..c48835fd --- /dev/null +++ b/libevmasm/BlockDeduplicator.h @@ -0,0 +1,77 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file BlockDeduplicator.h + * @author Christian <c@ethdev.com> + * @date 2015 + * Unifies basic blocks that share content. + */ + +#pragma once + +#include <cstddef> +#include <vector> +#include <functional> + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector<AssemblyItem>; + +/** + * Optimizer class to be used to unify blocks that share content. + * Modifies the passed vector in place. + */ +class BlockDeduplicator +{ +public: + BlockDeduplicator(AssemblyItems& _items): m_items(_items) {} + /// @returns true if something was changed + bool deduplicate(); + +private: + /// Iterator that skips tags and skips to the end if (all branches of) the control + /// flow does not continue to the next instruction. + /// If the arguments are supplied to the constructor, replaces items on the fly. + struct BlockIterator: std::iterator<std::forward_iterator_tag, AssemblyItem const> + { + public: + BlockIterator( + AssemblyItems::const_iterator _it, + AssemblyItems::const_iterator _end, + AssemblyItem const* _replaceItem = nullptr, + AssemblyItem const* _replaceWith = nullptr + ): + it(_it), end(_end), replaceItem(_replaceItem), replaceWith(_replaceWith) {} + BlockIterator& operator++(); + bool operator==(BlockIterator const& _other) const { return it == _other.it; } + bool operator!=(BlockIterator const& _other) const { return it != _other.it; } + AssemblyItem const& operator*() const; + AssemblyItems::const_iterator it; + AssemblyItems::const_iterator end; + AssemblyItem const* replaceItem; + AssemblyItem const* replaceWith; + }; + + AssemblyItems& m_items; +}; + +} +} diff --git a/libevmasm/CMakeLists.txt b/libevmasm/CMakeLists.txt new file mode 100644 index 00000000..424644ca --- /dev/null +++ b/libevmasm/CMakeLists.txt @@ -0,0 +1,15 @@ +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB") + +aux_source_directory(. SRC_LIST) + +set(EXECUTABLE evmasm) + +file(GLOB HEADERS "*.h") + +include_directories(BEFORE ..) +add_library(${EXECUTABLE} ${SRC_LIST} ${HEADERS}) +eth_use(${EXECUTABLE} REQUIRED Eth::evmcore) + +install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) +install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) + diff --git a/libevmasm/CommonSubexpressionEliminator.cpp b/libevmasm/CommonSubexpressionEliminator.cpp new file mode 100644 index 00000000..0797dd29 --- /dev/null +++ b/libevmasm/CommonSubexpressionEliminator.cpp @@ -0,0 +1,506 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file CommonSubexpressionEliminator.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Optimizer step for common subexpression elimination and stack reorganisation. + */ + +#include <functional> +#include <boost/range/adaptor/reversed.hpp> +#include <libdevcore/SHA3.h> +#include <libevmasm/CommonSubexpressionEliminator.h> +#include <libevmasm/AssemblyItem.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + +vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems() +{ + optimizeBreakingItem(); + + KnownState nextInitialState = m_state; + if (m_breakingItem) + nextInitialState.feedItem(*m_breakingItem); + KnownState nextState = nextInitialState; + + ScopeGuard reset([&]() + { + m_breakingItem = nullptr; + m_storeOperations.clear(); + m_initialState = move(nextInitialState); + m_state = move(nextState); + }); + + map<int, Id> initialStackContents; + map<int, Id> targetStackContents; + int minHeight = m_state.stackHeight() + 1; + if (!m_state.stackElements().empty()) + minHeight = min(minHeight, m_state.stackElements().begin()->first); + for (int height = minHeight; height <= m_initialState.stackHeight(); ++height) + initialStackContents[height] = m_initialState.stackElement(height, SourceLocation()); + for (int height = minHeight; height <= m_state.stackHeight(); ++height) + targetStackContents[height] = m_state.stackElement(height, SourceLocation()); + + AssemblyItems items = CSECodeGenerator(m_state.expressionClasses(), m_storeOperations).generateCode( + m_initialState.sequenceNumber(), + m_initialState.stackHeight(), + initialStackContents, + targetStackContents + ); + if (m_breakingItem) + items.push_back(*m_breakingItem); + + return items; +} + +void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _copyItem) +{ + StoreOperation op = m_state.feedItem(_item, _copyItem); + if (op.isValid()) + m_storeOperations.push_back(op); +} + +void CommonSubexpressionEliminator::optimizeBreakingItem() +{ + if (!m_breakingItem) + return; + + ExpressionClasses& classes = m_state.expressionClasses(); + SourceLocation const& itemLocation = m_breakingItem->location(); + if (*m_breakingItem == AssemblyItem(Instruction::JUMPI)) + { + AssemblyItem::JumpType jumpType = m_breakingItem->getJumpType(); + + Id condition = m_state.stackElement(m_state.stackHeight() - 1, itemLocation); + if (classes.knownNonZero(condition)) + { + feedItem(AssemblyItem(Instruction::SWAP1, itemLocation), true); + feedItem(AssemblyItem(Instruction::POP, itemLocation), true); + + AssemblyItem item(Instruction::JUMP, itemLocation); + item.setJumpType(jumpType); + m_breakingItem = classes.storeItem(item); + } + else if (classes.knownZero(condition)) + { + AssemblyItem it(Instruction::POP, itemLocation); + feedItem(it, true); + feedItem(it, true); + m_breakingItem = nullptr; + } + } + else if (*m_breakingItem == AssemblyItem(Instruction::RETURN)) + { + Id size = m_state.stackElement(m_state.stackHeight() - 1, itemLocation); + if (classes.knownZero(size)) + { + feedItem(AssemblyItem(Instruction::POP, itemLocation), true); + feedItem(AssemblyItem(Instruction::POP, itemLocation), true); + AssemblyItem item(Instruction::STOP, itemLocation); + m_breakingItem = classes.storeItem(item); + } + } +} + +CSECodeGenerator::CSECodeGenerator( + ExpressionClasses& _expressionClasses, + vector<CSECodeGenerator::StoreOperation> const& _storeOperations +): + m_expressionClasses(_expressionClasses) +{ + for (auto const& store: _storeOperations) + m_storeOperations[make_pair(store.target, store.slot)].push_back(store); +} + +AssemblyItems CSECodeGenerator::generateCode( + unsigned _initialSequenceNumber, + int _initialStackHeight, + map<int, Id> const& _initialStack, + map<int, Id> const& _targetStackContents +) +{ + m_stackHeight = _initialStackHeight; + m_stack = _initialStack; + m_targetStack = _targetStackContents; + for (auto const& item: m_stack) + m_classPositions[item.second].insert(item.first); + + // generate the dependency graph starting from final storage and memory writes and target stack contents + for (auto const& p: m_storeOperations) + addDependencies(p.second.back().expression); + for (auto const& targetItem: m_targetStack) + { + m_finalClasses.insert(targetItem.second); + addDependencies(targetItem.second); + } + + // store all needed sequenced expressions + set<pair<unsigned, Id>> sequencedExpressions; + for (auto const& p: m_neededBy) + for (auto id: {p.first, p.second}) + if (unsigned seqNr = m_expressionClasses.representative(id).sequenceNumber) + { + if (seqNr < _initialSequenceNumber) + // Invalid sequenced operation. + // @todo quick fix for now. Proper fix needs to choose representative with higher + // sequence number during dependency analyis. + BOOST_THROW_EXCEPTION(StackTooDeepException()); + sequencedExpressions.insert(make_pair(seqNr, id)); + } + + // Perform all operations on storage and memory in order, if they are needed. + for (auto const& seqAndId: sequencedExpressions) + if (!m_classPositions.count(seqAndId.second)) + generateClassElement(seqAndId.second, true); + + // generate the target stack elements + for (auto const& targetItem: m_targetStack) + { + if (m_stack.count(targetItem.first) && m_stack.at(targetItem.first) == targetItem.second) + continue; // already there + generateClassElement(targetItem.second); + assertThrow(!m_classPositions[targetItem.second].empty(), OptimizerException, ""); + if (m_classPositions[targetItem.second].count(targetItem.first)) + continue; + SourceLocation sourceLocation; + if (m_expressionClasses.representative(targetItem.second).item) + sourceLocation = m_expressionClasses.representative(targetItem.second).item->location(); + int position = classElementPosition(targetItem.second); + if (position < targetItem.first) + // it is already at its target, we need another copy + appendDup(position, sourceLocation); + else + appendOrRemoveSwap(position, sourceLocation); + appendOrRemoveSwap(targetItem.first, sourceLocation); + } + + // remove surplus elements + while (removeStackTopIfPossible()) + { + // no-op + } + + // check validity + int finalHeight = 0; + if (!m_targetStack.empty()) + // have target stack, so its height should be the final height + finalHeight = (--m_targetStack.end())->first; + else if (!_initialStack.empty()) + // no target stack, only erase the initial stack + finalHeight = _initialStack.begin()->first - 1; + else + // neither initial no target stack, no change in height + finalHeight = _initialStackHeight; + assertThrow(finalHeight == m_stackHeight, OptimizerException, "Incorrect final stack height."); + + return m_generatedItems; +} + +void CSECodeGenerator::addDependencies(Id _c) +{ + if (m_classPositions.count(_c)) + return; // it is already on the stack + if (m_neededBy.count(_c)) + return; // we already computed the dependencies for _c + ExpressionClasses::Expression expr = m_expressionClasses.representative(_c); + if (expr.item->type() == UndefinedItem) + BOOST_THROW_EXCEPTION( + // If this exception happens, we need to find a different way to generate the + // compound expression. + ItemNotAvailableException() << errinfo_comment("Undefined item requested but not available.") + ); + for (Id argument: expr.arguments) + { + addDependencies(argument); + m_neededBy.insert(make_pair(argument, _c)); + } + if (expr.item && expr.item->type() == Operation && ( + expr.item->instruction() == Instruction::SLOAD || + expr.item->instruction() == Instruction::MLOAD || + expr.item->instruction() == Instruction::SHA3 + )) + { + // this loads an unknown value from storage or memory and thus, in addition to its + // arguments, depends on all store operations to addresses where we do not know that + // they are different that occur before this load + StoreOperation::Target target = expr.item->instruction() == Instruction::SLOAD ? + StoreOperation::Storage : StoreOperation::Memory; + Id slotToLoadFrom = expr.arguments.at(0); + for (auto const& p: m_storeOperations) + { + if (p.first.first != target) + continue; + Id slot = p.first.second; + StoreOperations const& storeOps = p.second; + if (storeOps.front().sequenceNumber > expr.sequenceNumber) + continue; + bool knownToBeIndependent = false; + switch (expr.item->instruction()) + { + case Instruction::SLOAD: + knownToBeIndependent = m_expressionClasses.knownToBeDifferent(slot, slotToLoadFrom); + break; + case Instruction::MLOAD: + knownToBeIndependent = m_expressionClasses.knownToBeDifferentBy32(slot, slotToLoadFrom); + break; + case Instruction::SHA3: + { + Id length = expr.arguments.at(1); + AssemblyItem offsetInstr(Instruction::SUB, expr.item->location()); + Id offsetToStart = m_expressionClasses.find(offsetInstr, {slot, slotToLoadFrom}); + u256 const* o = m_expressionClasses.knownConstant(offsetToStart); + u256 const* l = m_expressionClasses.knownConstant(length); + if (l && *l == 0) + knownToBeIndependent = true; + else if (o) + { + // We could get problems here if both *o and *l are larger than 2**254 + // but it is probably ok for the optimizer to produce wrong code for such cases + // which cannot be executed anyway because of the non-payable price. + if (u2s(*o) <= -32) + knownToBeIndependent = true; + else if (l && u2s(*o) >= 0 && *o >= *l) + knownToBeIndependent = true; + } + break; + } + default: + break; + } + if (knownToBeIndependent) + continue; + + // note that store and load never have the same sequence number + Id latestStore = storeOps.front().expression; + for (auto it = ++storeOps.begin(); it != storeOps.end(); ++it) + if (it->sequenceNumber < expr.sequenceNumber) + latestStore = it->expression; + addDependencies(latestStore); + m_neededBy.insert(make_pair(latestStore, _c)); + } + } +} + +void CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) +{ + for (auto it: m_classPositions) + for (auto p: it.second) + if (p > m_stackHeight) + assertThrow(false, OptimizerException, ""); + // do some cleanup + removeStackTopIfPossible(); + + if (m_classPositions.count(_c)) + { + assertThrow( + !m_classPositions[_c].empty(), + OptimizerException, + "Element already removed but still needed." + ); + return; + } + ExpressionClasses::Expression const& expr = m_expressionClasses.representative(_c); + assertThrow( + _allowSequenced || expr.sequenceNumber == 0, + OptimizerException, + "Sequence constrained operation requested out of sequence." + ); + assertThrow(expr.item, OptimizerException, "Non-generated expression without item."); + assertThrow( + expr.item->type() != UndefinedItem, + OptimizerException, + "Undefined item requested but not available." + ); + vector<Id> const& arguments = expr.arguments; + for (Id arg: boost::adaptors::reverse(arguments)) + generateClassElement(arg); + + SourceLocation const& itemLocation = expr.item->location(); + // The arguments are somewhere on the stack now, so it remains to move them at the correct place. + // This is quite difficult as sometimes, the values also have to removed in this process + // (if canBeRemoved() returns true) and the two arguments can be equal. For now, this is + // implemented for every single case for combinations of up to two arguments manually. + if (arguments.size() == 1) + { + if (canBeRemoved(arguments[0], _c)) + appendOrRemoveSwap(classElementPosition(arguments[0]), itemLocation); + else + appendDup(classElementPosition(arguments[0]), itemLocation); + } + else if (arguments.size() == 2) + { + if (canBeRemoved(arguments[1], _c)) + { + appendOrRemoveSwap(classElementPosition(arguments[1]), itemLocation); + if (arguments[0] == arguments[1]) + appendDup(m_stackHeight, itemLocation); + else if (canBeRemoved(arguments[0], _c)) + { + appendOrRemoveSwap(m_stackHeight - 1, itemLocation); + appendOrRemoveSwap(classElementPosition(arguments[0]), itemLocation); + } + else + appendDup(classElementPosition(arguments[0]), itemLocation); + } + else + { + if (arguments[0] == arguments[1]) + { + appendDup(classElementPosition(arguments[0]), itemLocation); + appendDup(m_stackHeight, itemLocation); + } + else if (canBeRemoved(arguments[0], _c)) + { + appendOrRemoveSwap(classElementPosition(arguments[0]), itemLocation); + appendDup(classElementPosition(arguments[1]), itemLocation); + appendOrRemoveSwap(m_stackHeight - 1, itemLocation); + } + else + { + appendDup(classElementPosition(arguments[1]), itemLocation); + appendDup(classElementPosition(arguments[0]), itemLocation); + } + } + } + else + assertThrow( + arguments.size() <= 2, + OptimizerException, + "Opcodes with more than two arguments not implemented yet." + ); + for (size_t i = 0; i < arguments.size(); ++i) + assertThrow(m_stack[m_stackHeight - i] == arguments[i], OptimizerException, "Expected arguments not present." ); + + while (SemanticInformation::isCommutativeOperation(*expr.item) && + !m_generatedItems.empty() && + m_generatedItems.back() == AssemblyItem(Instruction::SWAP1)) + // this will not append a swap but remove the one that is already there + appendOrRemoveSwap(m_stackHeight - 1, itemLocation); + for (size_t i = 0; i < arguments.size(); ++i) + { + m_classPositions[m_stack[m_stackHeight - i]].erase(m_stackHeight - i); + m_stack.erase(m_stackHeight - i); + } + appendItem(*expr.item); + if (expr.item->type() != Operation || instructionInfo(expr.item->instruction()).ret == 1) + { + m_stack[m_stackHeight] = _c; + m_classPositions[_c].insert(m_stackHeight); + } + else + { + assertThrow( + instructionInfo(expr.item->instruction()).ret == 0, + OptimizerException, + "Invalid number of return values." + ); + m_classPositions[_c]; // ensure it is created to mark the expression as generated + } +} + +int CSECodeGenerator::classElementPosition(Id _id) const +{ + assertThrow( + m_classPositions.count(_id) && !m_classPositions.at(_id).empty(), + OptimizerException, + "Element requested but is not present." + ); + return *max_element(m_classPositions.at(_id).begin(), m_classPositions.at(_id).end()); +} + +bool CSECodeGenerator::canBeRemoved(Id _element, Id _result, int _fromPosition) +{ + // Default for _fromPosition is the canonical position of the element. + if (_fromPosition == c_invalidPosition) + _fromPosition = classElementPosition(_element); + + bool haveCopy = m_classPositions.at(_element).size() > 1; + if (m_finalClasses.count(_element)) + // It is part of the target stack. It can be removed if it is a copy that is not in the target position. + return haveCopy && (!m_targetStack.count(_fromPosition) || m_targetStack[_fromPosition] != _element); + else if (!haveCopy) + { + // Can be removed unless it is needed by a class that has not been computed yet. + // Note that m_classPositions also includes classes that were deleted in the meantime. + auto range = m_neededBy.equal_range(_element); + for (auto it = range.first; it != range.second; ++it) + if (it->second != _result && !m_classPositions.count(it->second)) + return false; + } + return true; +} + +bool CSECodeGenerator::removeStackTopIfPossible() +{ + if (m_stack.empty()) + return false; + assertThrow(m_stack.count(m_stackHeight) > 0, OptimizerException, ""); + Id top = m_stack[m_stackHeight]; + if (!canBeRemoved(top, Id(-1), m_stackHeight)) + return false; + m_classPositions[m_stack[m_stackHeight]].erase(m_stackHeight); + m_stack.erase(m_stackHeight); + appendItem(AssemblyItem(Instruction::POP)); + return true; +} + +void CSECodeGenerator::appendDup(int _fromPosition, SourceLocation const& _location) +{ + assertThrow(_fromPosition != c_invalidPosition, OptimizerException, ""); + int instructionNum = 1 + m_stackHeight - _fromPosition; + assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep, try removing local variables."); + assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access."); + appendItem(AssemblyItem(dupInstruction(instructionNum), _location)); + m_stack[m_stackHeight] = m_stack[_fromPosition]; + m_classPositions[m_stack[m_stackHeight]].insert(m_stackHeight); +} + +void CSECodeGenerator::appendOrRemoveSwap(int _fromPosition, SourceLocation const& _location) +{ + assertThrow(_fromPosition != c_invalidPosition, OptimizerException, ""); + if (_fromPosition == m_stackHeight) + return; + int instructionNum = m_stackHeight - _fromPosition; + assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep, try removing local variables."); + assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access."); + appendItem(AssemblyItem(swapInstruction(instructionNum), _location)); + + if (m_stack[m_stackHeight] != m_stack[_fromPosition]) + { + m_classPositions[m_stack[m_stackHeight]].erase(m_stackHeight); + m_classPositions[m_stack[m_stackHeight]].insert(_fromPosition); + m_classPositions[m_stack[_fromPosition]].erase(_fromPosition); + m_classPositions[m_stack[_fromPosition]].insert(m_stackHeight); + swap(m_stack[m_stackHeight], m_stack[_fromPosition]); + } + if (m_generatedItems.size() >= 2 && + SemanticInformation::isSwapInstruction(m_generatedItems.back()) && + *(m_generatedItems.end() - 2) == m_generatedItems.back()) + { + m_generatedItems.pop_back(); + m_generatedItems.pop_back(); + } +} + +void CSECodeGenerator::appendItem(AssemblyItem const& _item) +{ + m_generatedItems.push_back(_item); + m_stackHeight += _item.deposit(); +} diff --git a/libevmasm/CommonSubexpressionEliminator.h b/libevmasm/CommonSubexpressionEliminator.h new file mode 100644 index 00000000..f6c43c57 --- /dev/null +++ b/libevmasm/CommonSubexpressionEliminator.h @@ -0,0 +1,183 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file CommonSubexpressionEliminator.h + * @author Christian <c@ethdev.com> + * @date 2015 + * Optimizer step for common subexpression elimination and stack reorganisation. + */ + +#pragma once + +#include <vector> +#include <map> +#include <set> +#include <tuple> +#include <ostream> +#include <libdevcore/CommonIO.h> +#include <libdevcore/Exceptions.h> +#include <libevmasm/ExpressionClasses.h> +#include <libevmasm/SemanticInformation.h> +#include <libevmasm/KnownState.h> + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector<AssemblyItem>; + +/** + * Optimizer step that performs common subexpression elimination and stack reorganisation, + * i.e. it tries to infer equality among expressions and compute the values of two expressions + * known to be equal only once. + * + * The general workings are that for each assembly item that is fed into the eliminator, an + * equivalence class is derived from the operation and the equivalence class of its arguments. + * DUPi, SWAPi and some arithmetic instructions are used to infer equivalences while these + * classes are determined. + * + * When the list of optimized items is requested, they are generated in a bottom-up fashion, + * adding code for equivalence classes that were not yet computed. + */ +class CommonSubexpressionEliminator +{ +public: + using Id = ExpressionClasses::Id; + using StoreOperation = KnownState::StoreOperation; + + CommonSubexpressionEliminator(KnownState const& _state): m_initialState(_state), m_state(_state) {} + + /// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first + /// item that must be fed into a new instance of the eliminator. + template <class _AssemblyItemIterator> + _AssemblyItemIterator feedItems(_AssemblyItemIterator _iterator, _AssemblyItemIterator _end); + + /// @returns the resulting items after optimization. + AssemblyItems getOptimizedItems(); + +private: + /// Feeds the item into the system for analysis. + void feedItem(AssemblyItem const& _item, bool _copyItem = false); + + /// Tries to optimize the item that breaks the basic block at the end. + void optimizeBreakingItem(); + + KnownState m_initialState; + KnownState m_state; + /// Keeps information about which storage or memory slots were written to at which sequence + /// number with what instruction. + std::vector<StoreOperation> m_storeOperations; + + /// The item that breaks the basic block, can be nullptr. + /// It is usually appended to the block but can be optimized in some cases. + AssemblyItem const* m_breakingItem = nullptr; +}; + +/** + * Unit that generates code from current stack layout, target stack layout and information about + * the equivalence classes. + */ +class CSECodeGenerator +{ +public: + using StoreOperation = CommonSubexpressionEliminator::StoreOperation; + using StoreOperations = std::vector<StoreOperation>; + using Id = ExpressionClasses::Id; + + /// Initializes the code generator with the given classes and store operations. + /// The store operations have to be sorted by sequence number in ascending order. + CSECodeGenerator(ExpressionClasses& _expressionClasses, StoreOperations const& _storeOperations); + + /// @returns the assembly items generated from the given requirements + /// @param _initialSequenceNumber starting sequence number, do not generate sequenced operations + /// before this number. + /// @param _initialStack current contents of the stack (up to stack height of zero) + /// @param _targetStackContents final contents of the stack, by stack height relative to initial + /// @note should only be called once on each object. + AssemblyItems generateCode( + unsigned _initialSequenceNumber, + int _initialStackHeight, + std::map<int, Id> const& _initialStack, + std::map<int, Id> const& _targetStackContents + ); + +private: + /// Recursively discovers all dependencies to @a m_requests. + void addDependencies(Id _c); + + /// Produce code that generates the given element if it is not yet present. + /// @param _allowSequenced indicates that sequence-constrained operations are allowed + void generateClassElement(Id _c, bool _allowSequenced = false); + /// @returns the position of the representative of the given id on the stack. + /// @note throws an exception if it is not on the stack. + int classElementPosition(Id _id) const; + + /// @returns true if the copy of @a _element can be removed from stack position _fromPosition + /// - in general or, if given, while computing @a _result. + bool canBeRemoved(Id _element, Id _result = Id(-1), int _fromPosition = c_invalidPosition); + + /// Appends code to remove the topmost stack element if it can be removed. + bool removeStackTopIfPossible(); + + /// Appends a dup instruction to m_generatedItems to retrieve the element at the given stack position. + void appendDup(int _fromPosition, SourceLocation const& _location); + /// Appends a swap instruction to m_generatedItems to retrieve the element at the given stack position. + /// @note this might also remove the last item if it exactly the same swap instruction. + void appendOrRemoveSwap(int _fromPosition, SourceLocation const& _location); + /// Appends the given assembly item. + void appendItem(AssemblyItem const& _item); + + static const int c_invalidPosition = -0x7fffffff; + + AssemblyItems m_generatedItems; + /// Current height of the stack relative to the start. + int m_stackHeight; + /// If (b, a) is in m_requests then b is needed to compute a. + std::multimap<Id, Id> m_neededBy; + /// Current content of the stack. + std::map<int, Id> m_stack; + /// Current positions of equivalence classes, equal to the empty set if already deleted. + std::map<Id, std::set<int>> m_classPositions; + + /// The actual eqivalence class items and how to compute them. + ExpressionClasses& m_expressionClasses; + /// Keeps information about which storage or memory slots were written to by which operations. + /// The operations are sorted ascendingly by sequence number. + std::map<std::pair<StoreOperation::Target, Id>, StoreOperations> m_storeOperations; + /// The set of equivalence classes that should be present on the stack at the end. + std::set<Id> m_finalClasses; + std::map<int, Id> m_targetStack; +}; + +template <class _AssemblyItemIterator> +_AssemblyItemIterator CommonSubexpressionEliminator::feedItems( + _AssemblyItemIterator _iterator, + _AssemblyItemIterator _end +) +{ + assertThrow(!m_breakingItem, OptimizerException, "Invalid use of CommonSubexpressionEliminator."); + for (; _iterator != _end && !SemanticInformation::breaksCSEAnalysisBlock(*_iterator); ++_iterator) + feedItem(*_iterator); + if (_iterator != _end) + m_breakingItem = &(*_iterator++); + return _iterator; +} + +} +} diff --git a/libevmasm/ConstantOptimiser.cpp b/libevmasm/ConstantOptimiser.cpp new file mode 100644 index 00000000..766371b0 --- /dev/null +++ b/libevmasm/ConstantOptimiser.cpp @@ -0,0 +1,225 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file ConstantOptimiser.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + */ + +#include <libevmasm/ConstantOptimiser.h> +#include <libevmasm/Assembly.h> +#include <libevmasm/GasMeter.h> +using namespace std; +using namespace dev; +using namespace dev::eth; + +unsigned ConstantOptimisationMethod::optimiseConstants( + bool _isCreation, + size_t _runs, + Assembly& _assembly, + AssemblyItems& _items +) +{ + unsigned optimisations = 0; + map<AssemblyItem, size_t> pushes; + for (AssemblyItem const& item: _items) + if (item.type() == Push) + pushes[item]++; + for (auto it: pushes) + { + AssemblyItem const& item = it.first; + if (item.data() < 0x100) + continue; + Params params; + params.multiplicity = it.second; + params.isCreation = _isCreation; + params.runs = _runs; + LiteralMethod lit(params, item.data()); + bigint literalGas = lit.gasNeeded(); + CodeCopyMethod copy(params, item.data()); + bigint copyGas = copy.gasNeeded(); + ComputeMethod compute(params, item.data()); + bigint computeGas = compute.gasNeeded(); + if (copyGas < literalGas && copyGas < computeGas) + { + copy.execute(_assembly, _items); + optimisations++; + } + else if (computeGas < literalGas && computeGas < copyGas) + { + compute.execute(_assembly, _items); + optimisations++; + } + } + return optimisations; +} + +bigint ConstantOptimisationMethod::simpleRunGas(AssemblyItems const& _items) +{ + EVMSchedule schedule; // TODO: make relevant to context. + bigint gas = 0; + for (AssemblyItem const& item: _items) + if (item.type() == Push) + gas += GasMeter::runGas(Instruction::PUSH1, schedule); + else if (item.type() == Operation) + gas += GasMeter::runGas(item.instruction(), schedule); + return gas; +} + +bigint ConstantOptimisationMethod::dataGas(bytes const& _data) const +{ + if (m_params.isCreation) + { + bigint gas; + for (auto b: _data) + gas += b ? m_schedule.txDataNonZeroGas : m_schedule.txDataZeroGas; + return gas; + } + else + return m_schedule.createDataGas * dataSize(); +} + +size_t ConstantOptimisationMethod::bytesRequired(AssemblyItems const& _items) +{ + size_t size = 0; + for (AssemblyItem const& item: _items) + size += item.bytesRequired(3); // assume 3 byte addresses + return size; +} + +void ConstantOptimisationMethod::replaceConstants( + AssemblyItems& _items, + AssemblyItems const& _replacement +) const +{ + assertThrow(_items.size() > 0, OptimizerException, ""); + for (size_t i = 0; i < _items.size(); ++i) + { + if (_items.at(i) != AssemblyItem(m_value)) + continue; + _items[i] = _replacement[0]; + _items.insert(_items.begin() + i + 1, _replacement.begin() + 1, _replacement.end()); + i += _replacement.size() - 1; + } +} + +bigint LiteralMethod::gasNeeded() +{ + return combineGas( + simpleRunGas({Instruction::PUSH1}), + // PUSHX plus data + (m_params.isCreation ? m_schedule.txDataNonZeroGas : m_schedule.createDataGas) + dataGas(), + 0 + ); +} + +CodeCopyMethod::CodeCopyMethod(Params const& _params, u256 const& _value): + ConstantOptimisationMethod(_params, _value) +{ + m_copyRoutine = AssemblyItems{ + u256(0), + Instruction::DUP1, + Instruction::MLOAD, // back up memory + u256(32), + AssemblyItem(PushData, u256(1) << 16), // has to be replaced + Instruction::DUP4, + Instruction::CODECOPY, + Instruction::DUP2, + Instruction::MLOAD, + Instruction::SWAP2, + Instruction::MSTORE + }; +} + +bigint CodeCopyMethod::gasNeeded() +{ + return combineGas( + // Run gas: we ignore memory increase costs + simpleRunGas(m_copyRoutine) + m_schedule.copyGas, + // Data gas for copy routines: Some bytes are zero, but we ignore them. + bytesRequired(m_copyRoutine) * (m_params.isCreation ? m_schedule.txDataNonZeroGas : m_schedule.createDataGas), + // Data gas for data itself + dataGas(toBigEndian(m_value)) + ); +} + +void CodeCopyMethod::execute(Assembly& _assembly, AssemblyItems& _items) +{ + bytes data = toBigEndian(m_value); + m_copyRoutine[4] = _assembly.newData(data); + replaceConstants(_items, m_copyRoutine); +} + +AssemblyItems ComputeMethod::findRepresentation(u256 const& _value) +{ + if (_value < 0x10000) + // Very small value, not worth computing + return AssemblyItems{_value}; + else if (dev::bytesRequired(~_value) < dev::bytesRequired(_value)) + // Negated is shorter to represent + return findRepresentation(~_value) + AssemblyItems{Instruction::NOT}; + else + { + // Decompose value into a * 2**k + b where abs(b) << 2**k + // Is not always better, try literal and decomposition method. + AssemblyItems routine{u256(_value)}; + bigint bestGas = gasNeeded(routine); + for (unsigned bits = 255; bits > 8; --bits) + { + unsigned gapDetector = unsigned(_value >> (bits - 8)) & 0x1ff; + if (gapDetector != 0xff && gapDetector != 0x100) + continue; + + u256 powerOfTwo = u256(1) << bits; + u256 upperPart = _value >> bits; + bigint lowerPart = _value & (powerOfTwo - 1); + if (abs(powerOfTwo - lowerPart) < lowerPart) + lowerPart = lowerPart - powerOfTwo; // make it negative + if (abs(lowerPart) >= (powerOfTwo >> 8)) + continue; + + AssemblyItems newRoutine; + if (lowerPart != 0) + newRoutine += findRepresentation(u256(abs(lowerPart))); + newRoutine += AssemblyItems{u256(bits), u256(2), Instruction::EXP}; + if (upperPart != 1 && upperPart != 0) + newRoutine += findRepresentation(upperPart) + AssemblyItems{Instruction::MUL}; + if (lowerPart > 0) + newRoutine += AssemblyItems{Instruction::ADD}; + else if (lowerPart < 0) + newRoutine.push_back(Instruction::SUB); + + bigint newGas = gasNeeded(newRoutine); + if (newGas < bestGas) + { + bestGas = move(newGas); + routine = move(newRoutine); + } + } + return routine; + } +} + +bigint ComputeMethod::gasNeeded(AssemblyItems const& _routine) +{ + size_t numExps = count(_routine.begin(), _routine.end(), Instruction::EXP); + return combineGas( + simpleRunGas(_routine) + numExps * (m_schedule.expGas + m_schedule.expByteGas), + // Data gas for routine: Some bytes are zero, but we ignore them. + bytesRequired(_routine) * (m_params.isCreation ? m_schedule.txDataNonZeroGas : m_schedule.createDataGas), + 0 + ); +} diff --git a/libevmasm/ConstantOptimiser.h b/libevmasm/ConstantOptimiser.h new file mode 100644 index 00000000..64cb66bb --- /dev/null +++ b/libevmasm/ConstantOptimiser.h @@ -0,0 +1,155 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file ConstantOptimiser.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + */ + +#pragma once + +#include <vector> +#include <libdevcore/CommonData.h> +#include <libdevcore/CommonIO.h> +#include <libethcore/ChainOperationParams.h> + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector<AssemblyItem>; +class Assembly; + +// TODO: FIXME: HOMESTEAD: XXX: @chfast populate m_schedule from an ExtVMFace instance via ExtVMFace::evmSchedule. + +/** + * Abstract base class for one way to change how constants are represented in the code. + */ +class ConstantOptimisationMethod +{ +public: + /// Tries to optimised how constants are represented in the source code and modifies + /// @a _assembly and its @a _items. + /// @returns zero if no optimisations could be performed. + static unsigned optimiseConstants( + bool _isCreation, + size_t _runs, + Assembly& _assembly, + AssemblyItems& _items + ); + + struct Params + { + bool isCreation; ///< Whether this is called during contract creation or runtime. + size_t runs; ///< Estimated number of calls per opcode oven the lifetime of the contract. + size_t multiplicity; ///< Number of times the constant appears in the code. + }; + + explicit ConstantOptimisationMethod(Params const& _params, u256 const& _value): + m_params(_params), m_value(_value) {} + virtual bigint gasNeeded() = 0; + virtual void execute(Assembly& _assembly, AssemblyItems& _items) = 0; + +protected: + size_t dataSize() const { return std::max<size_t>(1, dev::bytesRequired(m_value)); } + + /// @returns the run gas for the given items ignoring special gas costs + static bigint simpleRunGas(AssemblyItems const& _items); + /// @returns the gas needed to store the given data literally + bigint dataGas(bytes const& _data) const; + /// @returns the gas needed to store the value literally + bigint dataGas() const { return dataGas(toCompactBigEndian(m_value, 1)); } + static size_t bytesRequired(AssemblyItems const& _items); + /// @returns the combined estimated gas usage taking @a m_params into account. + bigint combineGas( + bigint const& _runGas, + bigint const& _repeatedDataGas, + bigint const& _uniqueDataGas + ) + { + // _runGas is not multiplied by _multiplicity because the runs are "per opcode" + return m_params.runs * _runGas + m_params.multiplicity * _repeatedDataGas + _uniqueDataGas; + } + + /// Replaces the constant by the code given in @a _replacement. + void replaceConstants(AssemblyItems& _items, AssemblyItems const& _replacement) const; + + Params m_params; + u256 const& m_value; + EVMSchedule m_schedule; +}; + +/** + * Optimisation method that pushes the constant to the stack literally. This is the default method, + * i.e. executing it does not alter the Assembly. + */ +class LiteralMethod: public ConstantOptimisationMethod +{ +public: + explicit LiteralMethod(Params const& _params, u256 const& _value): + ConstantOptimisationMethod(_params, _value) {} + virtual bigint gasNeeded() override; + virtual void execute(Assembly&, AssemblyItems&) override {} + + EVMSchedule m_schedule; +}; + +/** + * Method that stores the data in the .data section of the code and copies it to the stack. + */ +class CodeCopyMethod: public ConstantOptimisationMethod +{ +public: + explicit CodeCopyMethod(Params const& _params, u256 const& _value); + virtual bigint gasNeeded() override; + virtual void execute(Assembly& _assembly, AssemblyItems& _items) override; + +protected: + AssemblyItems m_copyRoutine; + EVMSchedule m_schedule; +}; + +/** + * Method that tries to compute the constant. + */ +class ComputeMethod: public ConstantOptimisationMethod +{ +public: + explicit ComputeMethod(Params const& _params, u256 const& _value): + ConstantOptimisationMethod(_params, _value) + { + m_routine = findRepresentation(m_value); + } + + virtual bigint gasNeeded() override { return gasNeeded(m_routine); } + virtual void execute(Assembly&, AssemblyItems& _items) override + { + replaceConstants(_items, m_routine); + } + +protected: + /// Tries to recursively find a way to compute @a _value. + AssemblyItems findRepresentation(u256 const& _value); + bigint gasNeeded(AssemblyItems const& _routine); + + AssemblyItems m_routine; + EVMSchedule m_schedule; +}; + +} +} diff --git a/libevmasm/ControlFlowGraph.cpp b/libevmasm/ControlFlowGraph.cpp new file mode 100644 index 00000000..fc2144c7 --- /dev/null +++ b/libevmasm/ControlFlowGraph.cpp @@ -0,0 +1,369 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file ControlFlowGraph.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Control flow analysis for the optimizer. + */ + +#include <libevmasm/ControlFlowGraph.h> +#include <map> +#include <memory> +#include <algorithm> +#include <libevmasm/Exceptions.h> +#include <libevmasm/AssemblyItem.h> +#include <libevmasm/SemanticInformation.h> +#include <libevmasm/KnownState.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + +BlockId::BlockId(u256 const& _id): + m_id(unsigned(_id)) +{ + assertThrow( _id < initial().m_id, OptimizerException, "Tag number too large."); +} + +BasicBlocks ControlFlowGraph::optimisedBlocks() +{ + if (m_items.empty()) + return BasicBlocks(); + + findLargestTag(); + splitBlocks(); + resolveNextLinks(); + removeUnusedBlocks(); + setPrevLinks(); + gatherKnowledge(); + + return rebuildCode(); +} + +void ControlFlowGraph::findLargestTag() +{ + m_lastUsedId = 0; + for (auto const& item: m_items) + if (item.type() == Tag || item.type() == PushTag) + { + // Assert that it can be converted. + BlockId(item.data()); + m_lastUsedId = max(unsigned(item.data()), m_lastUsedId); + } +} + +void ControlFlowGraph::splitBlocks() +{ + m_blocks.clear(); + BlockId id = BlockId::initial(); + m_blocks[id].begin = 0; + for (size_t index = 0; index < m_items.size(); ++index) + { + AssemblyItem const& item = m_items.at(index); + if (item.type() == Tag) + { + if (id) + m_blocks[id].end = index; + id = BlockId::invalid(); + } + if (!id) + { + id = item.type() == Tag ? BlockId(item.data()) : generateNewId(); + m_blocks[id].begin = index; + } + if (item.type() == PushTag) + m_blocks[id].pushedTags.push_back(BlockId(item.data())); + if (SemanticInformation::altersControlFlow(item)) + { + m_blocks[id].end = index + 1; + if (item == Instruction::JUMP) + m_blocks[id].endType = BasicBlock::EndType::JUMP; + else if (item == Instruction::JUMPI) + m_blocks[id].endType = BasicBlock::EndType::JUMPI; + else + m_blocks[id].endType = BasicBlock::EndType::STOP; + id = BlockId::invalid(); + } + } + if (id) + { + m_blocks[id].end = m_items.size(); + if (m_blocks[id].endType == BasicBlock::EndType::HANDOVER) + m_blocks[id].endType = BasicBlock::EndType::STOP; + } +} + +void ControlFlowGraph::resolveNextLinks() +{ + map<unsigned, BlockId> blockByBeginPos; + for (auto const& idAndBlock: m_blocks) + if (idAndBlock.second.begin != idAndBlock.second.end) + blockByBeginPos[idAndBlock.second.begin] = idAndBlock.first; + + for (auto& idAndBlock: m_blocks) + { + BasicBlock& block = idAndBlock.second; + switch (block.endType) + { + case BasicBlock::EndType::JUMPI: + case BasicBlock::EndType::HANDOVER: + assertThrow( + blockByBeginPos.count(block.end), + OptimizerException, + "Successor block not found." + ); + block.next = blockByBeginPos.at(block.end); + break; + default: + break; + } + } +} + +void ControlFlowGraph::removeUnusedBlocks() +{ + vector<BlockId> blocksToProcess{BlockId::initial()}; + set<BlockId> neededBlocks{BlockId::initial()}; + while (!blocksToProcess.empty()) + { + BasicBlock const& block = m_blocks.at(blocksToProcess.back()); + blocksToProcess.pop_back(); + for (BlockId tag: block.pushedTags) + if (!neededBlocks.count(tag) && m_blocks.count(tag)) + { + neededBlocks.insert(tag); + blocksToProcess.push_back(tag); + } + if (block.next && !neededBlocks.count(block.next)) + { + neededBlocks.insert(block.next); + blocksToProcess.push_back(block.next); + } + } + for (auto it = m_blocks.begin(); it != m_blocks.end();) + if (neededBlocks.count(it->first)) + ++it; + else + m_blocks.erase(it++); +} + +void ControlFlowGraph::setPrevLinks() +{ + for (auto& idAndBlock: m_blocks) + { + BasicBlock& block = idAndBlock.second; + switch (block.endType) + { + case BasicBlock::EndType::JUMPI: + case BasicBlock::EndType::HANDOVER: + assertThrow( + !m_blocks.at(block.next).prev, + OptimizerException, + "Successor already has predecessor." + ); + m_blocks[block.next].prev = idAndBlock.first; + break; + default: + break; + } + } + // If block ends with jump to not yet linked block, link them removing the jump + for (auto& idAndBlock: m_blocks) + { + BlockId blockId = idAndBlock.first; + BasicBlock& block = idAndBlock.second; + if (block.endType != BasicBlock::EndType::JUMP || block.end - block.begin < 2) + continue; + AssemblyItem const& push = m_items.at(block.end - 2); + if (push.type() != PushTag) + continue; + BlockId nextId(push.data()); + if (m_blocks.count(nextId) && m_blocks.at(nextId).prev) + continue; + bool hasLoop = false; + for (BlockId id = nextId; id && m_blocks.count(id) && !hasLoop; id = m_blocks.at(id).next) + hasLoop = (id == blockId); + if (hasLoop || !m_blocks.count(nextId)) + continue; + + m_blocks[nextId].prev = blockId; + block.next = nextId; + block.end -= 2; + assertThrow( + !block.pushedTags.empty() && block.pushedTags.back() == nextId, + OptimizerException, + "Last pushed tag not at end of pushed list." + ); + block.pushedTags.pop_back(); + block.endType = BasicBlock::EndType::HANDOVER; + } +} + +void ControlFlowGraph::gatherKnowledge() +{ + // @todo actually we know that memory is filled with zeros at the beginning, + // we could make use of that. + KnownStatePointer emptyState = make_shared<KnownState>(); + bool unknownJumpEncountered = false; + + struct WorkQueueItem { + BlockId blockId; + KnownStatePointer state; + set<BlockId> blocksSeen; + }; + + vector<WorkQueueItem> workQueue{WorkQueueItem{BlockId::initial(), emptyState->copy(), set<BlockId>()}}; + auto addWorkQueueItem = [&](WorkQueueItem const& _currentItem, BlockId _to, KnownStatePointer const& _state) + { + WorkQueueItem item; + item.blockId = _to; + item.state = _state->copy(); + item.blocksSeen = _currentItem.blocksSeen; + item.blocksSeen.insert(_currentItem.blockId); + workQueue.push_back(move(item)); + }; + + while (!workQueue.empty()) + { + WorkQueueItem item = move(workQueue.back()); + workQueue.pop_back(); + //@todo we might have to do something like incrementing the sequence number for each JUMPDEST + assertThrow(!!item.blockId, OptimizerException, ""); + if (!m_blocks.count(item.blockId)) + continue; // too bad, we do not know the tag, probably an invalid jump + BasicBlock& block = m_blocks.at(item.blockId); + KnownStatePointer state = item.state; + if (block.startState) + { + state->reduceToCommonKnowledge(*block.startState, !item.blocksSeen.count(item.blockId)); + if (*state == *block.startState) + continue; + } + + block.startState = state->copy(); + + // Feed all items except for the final jump yet because it will erase the target tag. + unsigned pc = block.begin; + while (pc < block.end && !SemanticInformation::altersControlFlow(m_items.at(pc))) + state->feedItem(m_items.at(pc++)); + + if ( + block.endType == BasicBlock::EndType::JUMP || + block.endType == BasicBlock::EndType::JUMPI + ) + { + assertThrow(block.begin <= pc && pc == block.end - 1, OptimizerException, ""); + //@todo in the case of JUMPI, add knowledge about the condition to the state + // (for both values of the condition) + set<u256> tags = state->tagsInExpression( + state->stackElement(state->stackHeight(), SourceLocation()) + ); + state->feedItem(m_items.at(pc++)); + + if (tags.empty()) + { + if (!unknownJumpEncountered) + { + // We do not know the target of this jump, so we have to reset the states of all + // JUMPDESTs. + unknownJumpEncountered = true; + for (auto const& it: m_blocks) + if (it.second.begin < it.second.end && m_items[it.second.begin].type() == Tag) + workQueue.push_back(WorkQueueItem{it.first, emptyState->copy(), set<BlockId>()}); + } + } + else + for (auto tag: tags) + addWorkQueueItem(item, BlockId(tag), state); + } + else if (block.begin <= pc && pc < block.end) + state->feedItem(m_items.at(pc++)); + assertThrow(block.end <= block.begin || pc == block.end, OptimizerException, ""); + + block.endState = state; + + if ( + block.endType == BasicBlock::EndType::HANDOVER || + block.endType == BasicBlock::EndType::JUMPI + ) + addWorkQueueItem(item, block.next, state); + } + + // Remove all blocks we never visited here. This might happen because a tag is pushed but + // never used for a JUMP. + // Note that this invalidates some contents of pushedTags + for (auto it = m_blocks.begin(); it != m_blocks.end();) + if (!it->second.startState) + it = m_blocks.erase(it); + else + it++; +} + +BasicBlocks ControlFlowGraph::rebuildCode() +{ + map<BlockId, unsigned> pushes; + for (auto& idAndBlock: m_blocks) + for (BlockId ref: idAndBlock.second.pushedTags) + if (m_blocks.count(ref)) + pushes[ref]++; + + set<BlockId> blocksToAdd; + for (auto it: m_blocks) + blocksToAdd.insert(it.first); + set<BlockId> blocksAdded; + BasicBlocks blocks; + + for ( + BlockId blockId = BlockId::initial(); + blockId; + blockId = blocksToAdd.empty() ? BlockId::invalid() : *blocksToAdd.begin() + ) + { + bool previousHandedOver = (blockId == BlockId::initial()); + while (m_blocks.at(blockId).prev) + blockId = m_blocks.at(blockId).prev; + for (; blockId; blockId = m_blocks.at(blockId).next) + { + BasicBlock& block = m_blocks.at(blockId); + blocksToAdd.erase(blockId); + blocksAdded.insert(blockId); + + if (block.begin == block.end) + continue; + // If block starts with unused tag, skip it. + if (previousHandedOver && !pushes[blockId] && m_items[block.begin].type() == Tag) + ++block.begin; + if (block.begin < block.end) + { + blocks.push_back(block); + blocks.back().startState->clearTagUnions(); + blocks.back().endState->clearTagUnions(); + } + previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER); + } + } + + return blocks; +} + +BlockId ControlFlowGraph::generateNewId() +{ + BlockId id = BlockId(++m_lastUsedId); + assertThrow(id < BlockId::initial(), OptimizerException, "Out of block IDs."); + return id; +} diff --git a/libevmasm/ControlFlowGraph.h b/libevmasm/ControlFlowGraph.h new file mode 100644 index 00000000..4480ba49 --- /dev/null +++ b/libevmasm/ControlFlowGraph.h @@ -0,0 +1,120 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file ControlFlowGraph.h + * @author Christian <c@ethdev.com> + * @date 2015 + * Control flow analysis for the optimizer. + */ + +#pragma once + +#include <vector> +#include <memory> +#include <libdevcore/Common.h> +#include <libdevcore/Assertions.h> +#include <libevmasm/ExpressionClasses.h> + +namespace dev +{ +namespace eth +{ + +class KnownState; +using KnownStatePointer = std::shared_ptr<KnownState>; + +/** + * Identifier for a block, coincides with the tag number of an AssemblyItem but adds a special + * ID for the inital block. + */ +class BlockId +{ +public: + BlockId() { *this = invalid(); } + explicit BlockId(unsigned _id): m_id(_id) {} + explicit BlockId(u256 const& _id); + static BlockId initial() { return BlockId(-2); } + static BlockId invalid() { return BlockId(-1); } + + bool operator==(BlockId const& _other) const { return m_id == _other.m_id; } + bool operator!=(BlockId const& _other) const { return m_id != _other.m_id; } + bool operator<(BlockId const& _other) const { return m_id < _other.m_id; } + explicit operator bool() const { return *this != invalid(); } + +private: + unsigned m_id; +}; + +/** + * Control flow block inside which instruction counter is always incremented by one + * (except for possibly the last instruction). + */ +struct BasicBlock +{ + /// Start index into assembly item list. + unsigned begin = 0; + /// End index (excluded) inte assembly item list. + unsigned end = 0; + /// Tags pushed inside this block, with multiplicity. + std::vector<BlockId> pushedTags; + /// ID of the block that always follows this one (either non-branching part of JUMPI or flow + /// into new block), or BlockId::invalid() otherwise + BlockId next = BlockId::invalid(); + /// ID of the block that has to precede this one (because control flows into it). + BlockId prev = BlockId::invalid(); + + enum class EndType { JUMP, JUMPI, STOP, HANDOVER }; + EndType endType = EndType::HANDOVER; + + /// Knowledge about the state when this block is entered. Intersection of all possible ways + /// to enter this block. + KnownStatePointer startState; + /// Knowledge about the state at the end of this block. + KnownStatePointer endState; +}; + +using BasicBlocks = std::vector<BasicBlock>; + +class ControlFlowGraph +{ +public: + /// Initializes the control flow graph. + /// @a _items has to persist across the usage of this class. + ControlFlowGraph(AssemblyItems const& _items): m_items(_items) {} + /// @returns vector of basic blocks in the order they should be used in the final code. + /// Should be called only once. + BasicBlocks optimisedBlocks(); + +private: + void findLargestTag(); + void splitBlocks(); + void resolveNextLinks(); + void removeUnusedBlocks(); + void gatherKnowledge(); + void setPrevLinks(); + BasicBlocks rebuildCode(); + + BlockId generateNewId(); + + unsigned m_lastUsedId = 0; + AssemblyItems const& m_items; + std::map<BlockId, BasicBlock> m_blocks; +}; + + +} +} diff --git a/libevmasm/Exceptions.h b/libevmasm/Exceptions.h new file mode 100644 index 00000000..03b8afde --- /dev/null +++ b/libevmasm/Exceptions.h @@ -0,0 +1,37 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Exceptions.h + * @author Christian <c@ethdev.com> + * @date 2014 + */ + +#pragma once + +#include <libdevcore/Exceptions.h> + +namespace dev +{ +namespace eth +{ + +struct AssemblyException: virtual Exception {}; +struct OptimizerException: virtual AssemblyException {}; +struct StackTooDeepException: virtual OptimizerException {}; +struct ItemNotAvailableException: virtual OptimizerException {}; + +} +} diff --git a/libevmasm/ExpressionClasses.cpp b/libevmasm/ExpressionClasses.cpp new file mode 100644 index 00000000..9d13a57a --- /dev/null +++ b/libevmasm/ExpressionClasses.cpp @@ -0,0 +1,511 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file ExpressionClasses.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Container for equivalence classes of expressions for use in common subexpression elimination. + */ + +#include <libevmasm/ExpressionClasses.h> +#include <utility> +#include <tuple> +#include <functional> +#include <boost/range/adaptor/reversed.hpp> +#include <boost/noncopyable.hpp> +#include <libevmasm/Assembly.h> +#include <libevmasm/CommonSubexpressionEliminator.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + + +bool ExpressionClasses::Expression::operator<(ExpressionClasses::Expression const& _other) const +{ + assertThrow(!!item && !!_other.item, OptimizerException, ""); + auto type = item->type(); + auto otherType = _other.item->type(); + return std::tie(type, item->data(), arguments, sequenceNumber) < + std::tie(otherType, _other.item->data(), _other.arguments, _other.sequenceNumber); +} + +ExpressionClasses::Id ExpressionClasses::find( + AssemblyItem const& _item, + Ids const& _arguments, + bool _copyItem, + unsigned _sequenceNumber +) +{ + Expression exp; + exp.id = Id(-1); + exp.item = &_item; + exp.arguments = _arguments; + exp.sequenceNumber = _sequenceNumber; + + if (SemanticInformation::isCommutativeOperation(_item)) + sort(exp.arguments.begin(), exp.arguments.end()); + + if (SemanticInformation::isDeterministic(_item)) + { + auto it = m_expressions.find(exp); + if (it != m_expressions.end()) + return it->id; + } + + if (_copyItem) + exp.item = storeItem(_item); + + ExpressionClasses::Id id = tryToSimplify(exp); + if (id < m_representatives.size()) + exp.id = id; + else + { + exp.id = m_representatives.size(); + m_representatives.push_back(exp); + } + m_expressions.insert(exp); + return exp.id; +} + +void ExpressionClasses::forceEqual( + ExpressionClasses::Id _id, + AssemblyItem const& _item, + ExpressionClasses::Ids const& _arguments, + bool _copyItem +) +{ + Expression exp; + exp.id = _id; + exp.item = &_item; + exp.arguments = _arguments; + + if (SemanticInformation::isCommutativeOperation(_item)) + sort(exp.arguments.begin(), exp.arguments.end()); + + if (_copyItem) + exp.item = storeItem(_item); + + m_expressions.insert(exp); +} + +ExpressionClasses::Id ExpressionClasses::newClass(SourceLocation const& _location) +{ + Expression exp; + exp.id = m_representatives.size(); + exp.item = storeItem(AssemblyItem(UndefinedItem, (u256(1) << 255) + exp.id, _location)); + m_representatives.push_back(exp); + m_expressions.insert(exp); + return exp.id; +} + +bool ExpressionClasses::knownToBeDifferent(ExpressionClasses::Id _a, ExpressionClasses::Id _b) +{ + // Try to simplify "_a - _b" and return true iff the value is a non-zero constant. + return knownNonZero(find(Instruction::SUB, {_a, _b})); +} + +bool ExpressionClasses::knownToBeDifferentBy32(ExpressionClasses::Id _a, ExpressionClasses::Id _b) +{ + // Try to simplify "_a - _b" and return true iff the value is at least 32 away from zero. + u256 const* v = knownConstant(find(Instruction::SUB, {_a, _b})); + // forbidden interval is ["-31", 31] + return v && *v + 31 > u256(62); +} + +bool ExpressionClasses::knownZero(Id _c) +{ + return Pattern(u256(0)).matches(representative(_c), *this); +} + +bool ExpressionClasses::knownNonZero(Id _c) +{ + return Pattern(u256(0)).matches(representative(find(Instruction::ISZERO, {_c})), *this); +} + +u256 const* ExpressionClasses::knownConstant(Id _c) +{ + map<unsigned, Expression const*> matchGroups; + Pattern constant(Push); + constant.setMatchGroup(1, matchGroups); + if (!constant.matches(representative(_c), *this)) + return nullptr; + return &constant.d(); +} + +AssemblyItem const* ExpressionClasses::storeItem(AssemblyItem const& _item) +{ + m_spareAssemblyItems.push_back(make_shared<AssemblyItem>(_item)); + return m_spareAssemblyItems.back().get(); +} + +string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const +{ + Expression const& expr = representative(_id); + stringstream str; + str << dec << expr.id << ":"; + if (expr.item) + { + str << *expr.item << "("; + for (Id arg: expr.arguments) + str << fullDAGToString(arg) << ","; + str << ")"; + } + else + str << " UNIQUE"; + return str.str(); +} + +class Rules: public boost::noncopyable +{ +public: + Rules(); + void resetMatchGroups() { m_matchGroups.clear(); } + vector<pair<Pattern, function<Pattern()>>> rules() const { return m_rules; } + +private: + using Expression = ExpressionClasses::Expression; + map<unsigned, Expression const*> m_matchGroups; + vector<pair<Pattern, function<Pattern()>>> m_rules; +}; + +template <class S> S divWorkaround(S const& _a, S const& _b) +{ + return (S)(bigint(_a) / bigint(_b)); +} + +template <class S> S modWorkaround(S const& _a, S const& _b) +{ + return (S)(bigint(_a) % bigint(_b)); +} + +Rules::Rules() +{ + // Multiple occurences of one of these inside one rule must match the same equivalence class. + // Constants. + Pattern A(Push); + Pattern B(Push); + Pattern C(Push); + // Anything. + Pattern X; + Pattern Y; + Pattern Z; + A.setMatchGroup(1, m_matchGroups); + B.setMatchGroup(2, m_matchGroups); + C.setMatchGroup(3, m_matchGroups); + X.setMatchGroup(4, m_matchGroups); + Y.setMatchGroup(5, m_matchGroups); + Z.setMatchGroup(6, m_matchGroups); + + m_rules = vector<pair<Pattern, function<Pattern()>>>{ + // arithmetics on constants + {{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }}, + {{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }}, + {{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }}, + {{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : divWorkaround(A.d(), B.d()); }}, + {{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(divWorkaround(u2s(A.d()), u2s(B.d()))); }}, + {{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : modWorkaround(A.d(), B.d()); }}, + {{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(modWorkaround(u2s(A.d()), u2s(B.d()))); }}, + {{Instruction::EXP, {A, B}}, [=]{ return u256(boost::multiprecision::powm(bigint(A.d()), bigint(B.d()), bigint(1) << 256)); }}, + {{Instruction::NOT, {A}}, [=]{ return ~A.d(); }}, + {{Instruction::LT, {A, B}}, [=]() { return A.d() < B.d() ? u256(1) : 0; }}, + {{Instruction::GT, {A, B}}, [=]() -> u256 { return A.d() > B.d() ? 1 : 0; }}, + {{Instruction::SLT, {A, B}}, [=]() -> u256 { return u2s(A.d()) < u2s(B.d()) ? 1 : 0; }}, + {{Instruction::SGT, {A, B}}, [=]() -> u256 { return u2s(A.d()) > u2s(B.d()) ? 1 : 0; }}, + {{Instruction::EQ, {A, B}}, [=]() -> u256 { return A.d() == B.d() ? 1 : 0; }}, + {{Instruction::ISZERO, {A}}, [=]() -> u256 { return A.d() == 0 ? 1 : 0; }}, + {{Instruction::AND, {A, B}}, [=]{ return A.d() & B.d(); }}, + {{Instruction::OR, {A, B}}, [=]{ return A.d() | B.d(); }}, + {{Instruction::XOR, {A, B}}, [=]{ return A.d() ^ B.d(); }}, + {{Instruction::BYTE, {A, B}}, [=]{ return A.d() >= 32 ? 0 : (B.d() >> unsigned(8 * (31 - A.d()))) & 0xff; }}, + {{Instruction::ADDMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) + bigint(B.d())) % C.d()); }}, + {{Instruction::MULMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) * bigint(B.d())) % C.d()); }}, + {{Instruction::MULMOD, {A, B, C}}, [=]{ return A.d() * B.d(); }}, + {{Instruction::SIGNEXTEND, {A, B}}, [=]() -> u256 { + if (A.d() >= 31) + return B.d(); + unsigned testBit = unsigned(A.d()) * 8 + 7; + u256 mask = (u256(1) << testBit) - 1; + return u256(boost::multiprecision::bit_test(B.d(), testBit) ? B.d() | ~mask : B.d() & mask); + }}, + + // invariants involving known constants + {{Instruction::ADD, {X, 0}}, [=]{ return X; }}, + {{Instruction::MUL, {X, 1}}, [=]{ return X; }}, + {{Instruction::DIV, {X, 1}}, [=]{ return X; }}, + {{Instruction::SDIV, {X, 1}}, [=]{ return X; }}, + {{Instruction::OR, {X, 0}}, [=]{ return X; }}, + {{Instruction::XOR, {X, 0}}, [=]{ return X; }}, + {{Instruction::AND, {X, ~u256(0)}}, [=]{ return X; }}, + {{Instruction::MUL, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::DIV, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {0, X}}, [=]{ return u256(0); }}, + {{Instruction::AND, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::OR, {X, ~u256(0)}}, [=]{ return ~u256(0); }}, + // operations involving an expression and itself + {{Instruction::AND, {X, X}}, [=]{ return X; }}, + {{Instruction::OR, {X, X}}, [=]{ return X; }}, + {{Instruction::SUB, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::EQ, {X, X}}, [=]{ return u256(1); }}, + {{Instruction::LT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SLT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::GT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SGT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {X, X}}, [=]{ return u256(0); }}, + + {{Instruction::NOT, {{Instruction::NOT, {X}}}}, [=]{ return X; }}, + }; + // Double negation of opcodes with binary result + for (auto const& op: vector<Instruction>{ + Instruction::EQ, + Instruction::LT, + Instruction::SLT, + Instruction::GT, + Instruction::SGT + }) + m_rules.push_back({ + {Instruction::ISZERO, {{Instruction::ISZERO, {{op, {X, Y}}}}}}, + [=]() -> Pattern { return {op, {X, Y}}; } + }); + m_rules.push_back({ + {Instruction::ISZERO, {{Instruction::ISZERO, {{Instruction::ISZERO, {X}}}}}}, + [=]() -> Pattern { return {Instruction::ISZERO, {X}}; } + }); + // Associative operations + for (auto const& opFun: vector<pair<Instruction,function<u256(u256 const&,u256 const&)>>>{ + {Instruction::ADD, plus<u256>()}, + {Instruction::MUL, multiplies<u256>()}, + {Instruction::AND, bit_and<u256>()}, + {Instruction::OR, bit_or<u256>()}, + {Instruction::XOR, bit_xor<u256>()} + }) + { + auto op = opFun.first; + auto fun = opFun.second; + // Moving constants to the outside, order matters here! + // we need actions that return expressions (or patterns?) here, and we need also reversed rules + // (X+A)+B -> X+(A+B) + m_rules += vector<pair<Pattern, function<Pattern()>>>{{ + {op, {{op, {X, A}}, B}}, + [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } + }, { + // X+(Y+A) -> (X+Y)+A + {op, {{op, {X, A}}, Y}}, + [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } + }, { + // For now, we still need explicit commutativity for the inner pattern + {op, {{op, {A, X}}, B}}, + [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } + }, { + {op, {{op, {A, X}}, Y}}, + [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } + }}; + } + // move constants across subtractions + m_rules += vector<pair<Pattern, function<Pattern()>>>{ + { + // X - A -> X + (-A) + {Instruction::SUB, {X, A}}, + [=]() -> Pattern { return {Instruction::ADD, {X, 0 - A.d()}}; } + }, { + // (X + A) - Y -> (X - Y) + A + {Instruction::SUB, {{Instruction::ADD, {X, A}}, Y}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } + }, { + // (A + X) - Y -> (X - Y) + A + {Instruction::SUB, {{Instruction::ADD, {A, X}}, Y}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } + }, { + // X - (Y + A) -> (X - Y) + (-A) + {Instruction::SUB, {X, {Instruction::ADD, {Y, A}}}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } + }, { + // X - (A + Y) -> (X - Y) + (-A) + {Instruction::SUB, {X, {Instruction::ADD, {A, Y}}}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } + } + }; +} + +ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, bool _secondRun) +{ + static Rules rules; + + if ( + !_expr.item || + _expr.item->type() != Operation || + !SemanticInformation::isDeterministic(*_expr.item) + ) + return -1; + + for (auto const& rule: rules.rules()) + { + rules.resetMatchGroups(); + if (rule.first.matches(_expr, *this)) + { + // Debug info + //cout << "Simplifying " << *_expr.item << "("; + //for (Id arg: _expr.arguments) + // cout << fullDAGToString(arg) << ", "; + //cout << ")" << endl; + //cout << "with rule " << rule.first.toString() << endl; + //ExpressionTemplate t(rule.second()); + //cout << "to " << rule.second().toString() << endl; + return rebuildExpression(ExpressionTemplate(rule.second(), _expr.item->location())); + } + } + + if (!_secondRun && _expr.arguments.size() == 2 && SemanticInformation::isCommutativeOperation(*_expr.item)) + { + Expression expr = _expr; + swap(expr.arguments[0], expr.arguments[1]); + return tryToSimplify(expr, true); + } + + return -1; +} + +ExpressionClasses::Id ExpressionClasses::rebuildExpression(ExpressionTemplate const& _template) +{ + if (_template.hasId) + return _template.id; + + Ids arguments; + for (ExpressionTemplate const& t: _template.arguments) + arguments.push_back(rebuildExpression(t)); + return find(_template.item, arguments); +} + + +Pattern::Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments): + m_type(Operation), + m_requireDataMatch(true), + m_data(_instruction), + m_arguments(_arguments) +{ +} + +void Pattern::setMatchGroup(unsigned _group, map<unsigned, Expression const*>& _matchGroups) +{ + m_matchGroup = _group; + m_matchGroups = &_matchGroups; +} + +bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const +{ + if (!matchesBaseItem(_expr.item)) + return false; + if (m_matchGroup) + { + if (!m_matchGroups->count(m_matchGroup)) + (*m_matchGroups)[m_matchGroup] = &_expr; + else if ((*m_matchGroups)[m_matchGroup]->id != _expr.id) + return false; + } + assertThrow(m_arguments.size() == 0 || _expr.arguments.size() == m_arguments.size(), OptimizerException, ""); + for (size_t i = 0; i < m_arguments.size(); ++i) + if (!m_arguments[i].matches(_classes.representative(_expr.arguments[i]), _classes)) + return false; + return true; +} + +AssemblyItem Pattern::toAssemblyItem(SourceLocation const& _location) const +{ + return AssemblyItem(m_type, m_data, _location); +} + +string Pattern::toString() const +{ + stringstream s; + switch (m_type) + { + case Operation: + s << instructionInfo(Instruction(unsigned(m_data))).name; + break; + case Push: + s << "PUSH " << hex << m_data; + break; + case UndefinedItem: + s << "ANY"; + break; + default: + s << "t=" << dec << m_type << " d=" << hex << m_data; + break; + } + if (!m_requireDataMatch) + s << " ~"; + if (m_matchGroup) + s << "[" << dec << m_matchGroup << "]"; + s << "("; + for (Pattern const& p: m_arguments) + s << p.toString() << ", "; + s << ")"; + return s.str(); +} + +bool Pattern::matchesBaseItem(AssemblyItem const* _item) const +{ + if (m_type == UndefinedItem) + return true; + if (!_item) + return false; + if (m_type != _item->type()) + return false; + if (m_requireDataMatch && m_data != _item->data()) + return false; + return true; +} + +Pattern::Expression const& Pattern::matchGroupValue() const +{ + assertThrow(m_matchGroup > 0, OptimizerException, ""); + assertThrow(!!m_matchGroups, OptimizerException, ""); + assertThrow((*m_matchGroups)[m_matchGroup], OptimizerException, ""); + return *(*m_matchGroups)[m_matchGroup]; +} + + +ExpressionTemplate::ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location) +{ + if (_pattern.matchGroup()) + { + hasId = true; + id = _pattern.id(); + } + else + { + hasId = false; + item = _pattern.toAssemblyItem(_location); + } + for (auto const& arg: _pattern.arguments()) + arguments.push_back(ExpressionTemplate(arg, _location)); +} + +string ExpressionTemplate::toString() const +{ + stringstream s; + if (hasId) + s << id; + else + s << item; + s << "("; + for (auto const& arg: arguments) + s << arg.toString(); + s << ")"; + return s.str(); +} diff --git a/libevmasm/ExpressionClasses.h b/libevmasm/ExpressionClasses.h new file mode 100644 index 00000000..4bfd7d24 --- /dev/null +++ b/libevmasm/ExpressionClasses.h @@ -0,0 +1,190 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file ExpressionClasses.h + * @author Christian <c@ethdev.com> + * @date 2015 + * Container for equivalence classes of expressions for use in common subexpression elimination. + */ + +#pragma once + +#include <vector> +#include <map> +#include <memory> +#include <libdevcore/Common.h> +#include <libevmasm/AssemblyItem.h> + +namespace dev +{ +namespace eth +{ + +class Pattern; +struct ExpressionTemplate; + +/** + * Collection of classes of equivalent expressions that can also determine the class of an expression. + * Identifiers are contiguously assigned to new classes starting from zero. + */ +class ExpressionClasses +{ +public: + using Id = unsigned; + using Ids = std::vector<Id>; + + struct Expression + { + Id id; + AssemblyItem const* item = nullptr; + Ids arguments; + /// Storage modification sequence, only used for storage and memory operations. + unsigned sequenceNumber = 0; + /// Behaves as if this was a tuple of (item->type(), item->data(), arguments, sequenceNumber). + bool operator<(Expression const& _other) const; + }; + + /// Retrieves the id of the expression equivalence class resulting from the given item applied to the + /// given classes, might also create a new one. + /// @param _copyItem if true, copies the assembly item to an internal storage instead of just + /// keeping a pointer. + /// The @a _sequenceNumber indicates the current storage or memory access sequence. + Id find( + AssemblyItem const& _item, + Ids const& _arguments = {}, + bool _copyItem = true, + unsigned _sequenceNumber = 0 + ); + /// @returns the canonical representative of an expression class. + Expression const& representative(Id _id) const { return m_representatives.at(_id); } + /// @returns the number of classes. + Id size() const { return m_representatives.size(); } + + /// Forces the given @a _item with @a _arguments to the class @a _id. This can be used to + /// add prior knowledge e.g. about CALLDATA, but has to be used with caution. Will not work as + /// expected if @a _item applied to @a _arguments already exists. + void forceEqual(Id _id, AssemblyItem const& _item, Ids const& _arguments, bool _copyItem = true); + + /// @returns the id of a new class which is different to all other classes. + Id newClass(SourceLocation const& _location); + + /// @returns true if the values of the given classes are known to be different (on every input). + /// @note that this function might still return false for some different inputs. + bool knownToBeDifferent(Id _a, Id _b); + /// Similar to @a knownToBeDifferent but require that abs(_a - b) >= 32. + bool knownToBeDifferentBy32(Id _a, Id _b); + /// @returns true if the value of the given class is known to be zero. + /// @note that this is not the negation of knownNonZero + bool knownZero(Id _c); + /// @returns true if the value of the given class is known to be nonzero. + /// @note that this is not the negation of knownZero + bool knownNonZero(Id _c); + /// @returns a pointer to the value if the given class is known to be a constant, + /// and a nullptr otherwise. + u256 const* knownConstant(Id _c); + + /// Stores a copy of the given AssemblyItem and returns a pointer to the copy that is valid for + /// the lifetime of the ExpressionClasses object. + AssemblyItem const* storeItem(AssemblyItem const& _item); + + std::string fullDAGToString(Id _id) const; + +private: + /// Tries to simplify the given expression. + /// @returns its class if it possible or Id(-1) otherwise. + /// @param _secondRun is set to true for the second run where arguments of commutative expressions are reversed + Id tryToSimplify(Expression const& _expr, bool _secondRun = false); + + /// Rebuilds an expression from a (matched) pattern. + Id rebuildExpression(ExpressionTemplate const& _template); + + std::vector<std::pair<Pattern, std::function<Pattern()>>> createRules() const; + + /// Expression equivalence class representatives - we only store one item of an equivalence. + std::vector<Expression> m_representatives; + /// All expression ever encountered. + std::set<Expression> m_expressions; + std::vector<std::shared_ptr<AssemblyItem>> m_spareAssemblyItems; +}; + +/** + * Pattern to match against an expression. + * Also stores matched expressions to retrieve them later, for constructing new expressions using + * ExpressionTemplate. + */ +class Pattern +{ +public: + using Expression = ExpressionClasses::Expression; + using Id = ExpressionClasses::Id; + + // Matches a specific constant value. + Pattern(unsigned _value): Pattern(u256(_value)) {} + // Matches a specific constant value. + Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(_value) {} + // Matches a specific assembly item type or anything if not given. + Pattern(AssemblyItemType _type = UndefinedItem): m_type(_type) {} + // Matches a given instruction with given arguments + Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments = {}); + /// Sets this pattern to be part of the match group with the identifier @a _group. + /// Inside one rule, all patterns in the same match group have to match expressions from the + /// same expression equivalence class. + void setMatchGroup(unsigned _group, std::map<unsigned, Expression const*>& _matchGroups); + unsigned matchGroup() const { return m_matchGroup; } + bool matches(Expression const& _expr, ExpressionClasses const& _classes) const; + + AssemblyItem toAssemblyItem(SourceLocation const& _location) const; + std::vector<Pattern> arguments() const { return m_arguments; } + + /// @returns the id of the matched expression if this pattern is part of a match group. + Id id() const { return matchGroupValue().id; } + /// @returns the data of the matched expression if this pattern is part of a match group. + u256 const& d() const { return matchGroupValue().item->data(); } + + std::string toString() const; + +private: + bool matchesBaseItem(AssemblyItem const* _item) const; + Expression const& matchGroupValue() const; + + AssemblyItemType m_type; + bool m_requireDataMatch = false; + u256 m_data = 0; + std::vector<Pattern> m_arguments; + unsigned m_matchGroup = 0; + std::map<unsigned, Expression const*>* m_matchGroups = nullptr; +}; + +/** + * Template for a new expression that can be built from matched patterns. + */ +struct ExpressionTemplate +{ + using Expression = ExpressionClasses::Expression; + using Id = ExpressionClasses::Id; + explicit ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location); + std::string toString() const; + bool hasId = false; + /// Id of the matched expression, if available. + Id id = Id(-1); + // Otherwise, assembly item. + AssemblyItem item = UndefinedItem; + std::vector<ExpressionTemplate> arguments; +}; + +} +} diff --git a/libevmasm/GasMeter.cpp b/libevmasm/GasMeter.cpp new file mode 100644 index 00000000..93583169 --- /dev/null +++ b/libevmasm/GasMeter.cpp @@ -0,0 +1,217 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file GasMeter.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + */ + +#include "GasMeter.h" +#include <libevmasm/KnownState.h> +using namespace std; +using namespace dev; +using namespace dev::eth; + +GasMeter::GasConsumption& GasMeter::GasConsumption::operator+=(GasConsumption const& _other) +{ + if (_other.isInfinite && !isInfinite) + *this = infinite(); + if (isInfinite) + return *this; + bigint v = bigint(value) + _other.value; + if (v > numeric_limits<u256>::max()) + *this = infinite(); + else + value = u256(v); + return *this; +} + +GasMeter::GasConsumption GasMeter::estimateMax(AssemblyItem const& _item) +{ + GasConsumption gas; + switch (_item.type()) + { + case Push: + case PushTag: + case PushData: + case PushString: + case PushSub: + case PushSubSize: + case PushProgramSize: + case PushLibraryAddress: + gas = runGas(Instruction::PUSH1); + break; + case Tag: + gas = runGas(Instruction::JUMPDEST); + break; + case Operation: + { + ExpressionClasses& classes = m_state->expressionClasses(); + gas = runGas(_item.instruction()); + switch (_item.instruction()) + { + case Instruction::SSTORE: + { + ExpressionClasses::Id slot = m_state->relativeStackElement(0); + ExpressionClasses::Id value = m_state->relativeStackElement(-1); + if (classes.knownZero(value) || ( + m_state->storageContent().count(slot) && + classes.knownNonZero(m_state->storageContent().at(slot)) + )) + gas += m_schedule.sstoreResetGas; //@todo take refunds into account + else + gas += m_schedule.sstoreSetGas; + break; + } + case Instruction::SLOAD: + gas += m_schedule.sloadGas; + break; + case Instruction::RETURN: + gas += memoryGas(0, -1); + break; + case Instruction::MLOAD: + case Instruction::MSTORE: + gas += memoryGas(classes.find(eth::Instruction::ADD, { + m_state->relativeStackElement(0), + classes.find(AssemblyItem(32)) + })); + break; + case Instruction::MSTORE8: + gas += memoryGas(classes.find(eth::Instruction::ADD, { + m_state->relativeStackElement(0), + classes.find(AssemblyItem(1)) + })); + break; + case Instruction::SHA3: + gas = m_schedule.sha3Gas; + gas += wordGas(m_schedule.sha3WordGas, m_state->relativeStackElement(-1)); + gas += memoryGas(0, -1); + break; + case Instruction::CALLDATACOPY: + case Instruction::CODECOPY: + gas += memoryGas(0, -2); + gas += wordGas(m_schedule.copyGas, m_state->relativeStackElement(-2)); + break; + case Instruction::EXTCODECOPY: + gas += memoryGas(-1, -3); + gas += wordGas(m_schedule.copyGas, m_state->relativeStackElement(-3)); + break; + case Instruction::LOG0: + case Instruction::LOG1: + case Instruction::LOG2: + case Instruction::LOG3: + case Instruction::LOG4: + { + unsigned n = unsigned(_item.instruction()) - unsigned(Instruction::LOG0); + gas = m_schedule.logGas + m_schedule.logTopicGas * n; + gas += memoryGas(0, -1); + if (u256 const* value = classes.knownConstant(m_state->relativeStackElement(-1))) + gas += m_schedule.logDataGas * (*value); + else + gas = GasConsumption::infinite(); + break; + } + case Instruction::CALL: + case Instruction::CALLCODE: + case Instruction::DELEGATECALL: + { + gas = m_schedule.callGas; + if (u256 const* value = classes.knownConstant(m_state->relativeStackElement(0))) + gas += (*value); + else + gas = GasConsumption::infinite(); + if (_item.instruction() == Instruction::CALL) + gas += m_schedule.callNewAccountGas; // We very rarely know whether the address exists. + int valueSize = _item.instruction() == Instruction::DELEGATECALL ? 0 : 1; + if (!classes.knownZero(m_state->relativeStackElement(-1 - valueSize))) + gas += m_schedule.callValueTransferGas; + gas += memoryGas(-2 - valueSize, -3 - valueSize); + gas += memoryGas(-4 - valueSize, -5 - valueSize); + break; + } + case Instruction::CREATE: + gas = m_schedule.createGas; + gas += memoryGas(-1, -2); + break; + case Instruction::EXP: + gas = m_schedule.expGas; + if (u256 const* value = classes.knownConstant(m_state->relativeStackElement(-1))) + gas += m_schedule.expByteGas * (32 - (h256(*value).firstBitSet() / 8)); + else + gas += m_schedule.expByteGas * 32; + break; + default: + break; + } + break; + } + default: + gas = GasConsumption::infinite(); + break; + } + + m_state->feedItem(_item); + return gas; +} + +GasMeter::GasConsumption GasMeter::wordGas(u256 const& _multiplier, ExpressionClasses::Id _position) +{ + u256 const* value = m_state->expressionClasses().knownConstant(_position); + if (!value) + return GasConsumption::infinite(); + return GasConsumption(_multiplier * ((*value + 31) / 32)); +} + +GasMeter::GasConsumption GasMeter::memoryGas(ExpressionClasses::Id _position) +{ + u256 const* value = m_state->expressionClasses().knownConstant(_position); + if (!value) + return GasConsumption::infinite(); + if (*value < m_largestMemoryAccess) + return GasConsumption(u256(0)); + u256 previous = m_largestMemoryAccess; + m_largestMemoryAccess = *value; + auto memGas = [=](u256 const& pos) -> u256 + { + u256 size = (pos + 31) / 32; + return m_schedule.memoryGas * size + size * size / m_schedule.quadCoeffDiv; + }; + return memGas(*value) - memGas(previous); +} + +GasMeter::GasConsumption GasMeter::memoryGas(int _stackPosOffset, int _stackPosSize) +{ + ExpressionClasses& classes = m_state->expressionClasses(); + if (classes.knownZero(m_state->relativeStackElement(_stackPosSize))) + return GasConsumption(0); + else + return memoryGas(classes.find(eth::Instruction::ADD, { + m_state->relativeStackElement(_stackPosOffset), + m_state->relativeStackElement(_stackPosSize) + })); +} + +u256 GasMeter::runGas(Instruction _instruction, EVMSchedule const& _es) +{ + if (_instruction == Instruction::JUMPDEST) + return 1; + + int tier = instructionInfo(_instruction).gasPriceTier; + assertThrow(tier != InvalidTier, OptimizerException, "Invalid gas tier."); + return _es.tierStepGas[tier]; +} + + diff --git a/libevmasm/GasMeter.h b/libevmasm/GasMeter.h new file mode 100644 index 00000000..b11a63a5 --- /dev/null +++ b/libevmasm/GasMeter.h @@ -0,0 +1,102 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file GasMeter.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + */ + +#pragma once + +#include <ostream> +#include <tuple> +#include <libevmasm/ExpressionClasses.h> +#include <libevmasm/AssemblyItem.h> +#include <libethcore/ChainOperationParams.h> + +namespace dev +{ +namespace eth +{ + +class KnownState; + +// TODO: FIXME: HOMESTEAD: XXX: @chfast populate m_schedule from an ExtVMFace instance via ExtVMFace::evmSchedule. + +/** + * Class that helps computing the maximum gas consumption for instructions. + * Has to be initialized with a certain known state that will be automatically updated for + * each call to estimateMax. These calls have to supply strictly subsequent AssemblyItems. + * A new gas meter has to be constructed (with a new state) for control flow changes. + */ +class GasMeter +{ +public: + struct GasConsumption + { + GasConsumption(u256 _value = 0, bool _infinite = false): value(_value), isInfinite(_infinite) {} + static GasConsumption infinite() { return GasConsumption(0, true); } + + GasConsumption& operator+=(GasConsumption const& _other); + bool operator<(GasConsumption const& _other) const { return this->tuple() < _other.tuple(); } + + std::tuple<bool const&, u256 const&> tuple() const { return std::tie(isInfinite, value); } + + u256 value; + bool isInfinite; + }; + + /// Constructs a new gas meter given the current state. + explicit GasMeter(std::shared_ptr<KnownState> const& _state, u256 const& _largestMemoryAccess = 0): + m_state(_state), m_largestMemoryAccess(_largestMemoryAccess) {} + + /// @returns an upper bound on the gas consumed by the given instruction and updates + /// the state. + GasConsumption estimateMax(AssemblyItem const& _item); + + u256 const& largestMemoryAccess() const { return m_largestMemoryAccess; } + + u256 runGas(Instruction _instruction) const { return runGas(_instruction, m_schedule); } + static u256 runGas(Instruction _instruction, EVMSchedule const& _es); + +private: + /// @returns _multiplier * (_value + 31) / 32, if _value is a known constant and infinite otherwise. + GasConsumption wordGas(u256 const& _multiplier, ExpressionClasses::Id _value); + /// @returns the gas needed to access the given memory position. + /// @todo this assumes that memory was never accessed before and thus over-estimates gas usage. + GasConsumption memoryGas(ExpressionClasses::Id _position); + /// @returns the memory gas for accessing the memory at a specific offset for a number of bytes + /// given as values on the stack at the given relative positions. + GasConsumption memoryGas(int _stackPosOffset, int _stackPosSize); + + std::shared_ptr<KnownState> m_state; + /// Largest point where memory was accessed since the creation of this object. + u256 m_largestMemoryAccess; + + EVMSchedule m_schedule; +}; + +inline std::ostream& operator<<(std::ostream& _str, GasMeter::GasConsumption const& _consumption) +{ + if (_consumption.isInfinite) + return _str << "[???]"; + else + return _str << std::dec << _consumption.value; +} + + +} +} diff --git a/libevmasm/KnownState.cpp b/libevmasm/KnownState.cpp new file mode 100644 index 00000000..dd269ff4 --- /dev/null +++ b/libevmasm/KnownState.cpp @@ -0,0 +1,411 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file KnownState.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Contains knowledge about the state of the virtual machine at a specific instruction. + */ + +#include "KnownState.h" +#include <functional> +#include <libdevcore/SHA3.h> +#include <libevmasm/AssemblyItem.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + +ostream& KnownState::stream(ostream& _out) const +{ + auto streamExpressionClass = [this](ostream& _out, Id _id) + { + auto const& expr = m_expressionClasses->representative(_id); + _out << " " << dec << _id << ": "; + if (!expr.item) + _out << " no item"; + else if (expr.item->type() == UndefinedItem) + _out << " unknown " << int(expr.item->data()); + else + _out << *expr.item; + if (expr.sequenceNumber) + _out << "@" << dec << expr.sequenceNumber; + _out << "("; + for (Id arg: expr.arguments) + _out << dec << arg << ","; + _out << ")" << endl; + }; + + _out << "=== State ===" << endl; + _out << "Stack height: " << dec << m_stackHeight << endl; + _out << "Equivalence classes: " << endl; + for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass) + streamExpressionClass(_out, eqClass); + + _out << "Stack: " << endl; + for (auto const& it: m_stackElements) + { + _out << " " << dec << it.first << ": "; + streamExpressionClass(_out, it.second); + } + _out << "Storage: " << endl; + for (auto const& it: m_storageContent) + { + _out << " "; + streamExpressionClass(_out, it.first); + _out << ": "; + streamExpressionClass(_out, it.second); + } + _out << "Memory: " << endl; + for (auto const& it: m_memoryContent) + { + _out << " "; + streamExpressionClass(_out, it.first); + _out << ": "; + streamExpressionClass(_out, it.second); + } + + return _out; +} + +KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem) +{ + StoreOperation op; + if (_item.type() == Tag) + { + // can be ignored + } + else if (_item.type() != Operation) + { + assertThrow(_item.deposit() == 1, InvalidDeposit, ""); + if (_item.pushedValue()) + // only available after assembly stage, should not be used for optimisation + setStackElement(++m_stackHeight, m_expressionClasses->find(*_item.pushedValue())); + else + setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem)); + } + else + { + Instruction instruction = _item.instruction(); + InstructionInfo info = instructionInfo(instruction); + if (SemanticInformation::isDupInstruction(_item)) + setStackElement( + m_stackHeight + 1, + stackElement( + m_stackHeight - int(instruction) + int(Instruction::DUP1), + _item.location() + ) + ); + else if (SemanticInformation::isSwapInstruction(_item)) + swapStackElements( + m_stackHeight, + m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1), + _item.location() + ); + else if (instruction != Instruction::POP) + { + vector<Id> arguments(info.args); + for (int i = 0; i < info.args; ++i) + arguments[i] = stackElement(m_stackHeight - i, _item.location()); + + if (_item.instruction() == Instruction::SSTORE) + op = storeInStorage(arguments[0], arguments[1], _item.location()); + else if (_item.instruction() == Instruction::SLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromStorage(arguments[0], _item.location()) + ); + else if (_item.instruction() == Instruction::MSTORE) + op = storeInMemory(arguments[0], arguments[1], _item.location()); + else if (_item.instruction() == Instruction::MLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromMemory(arguments[0], _item.location()) + ); + else if (_item.instruction() == Instruction::SHA3) + setStackElement( + m_stackHeight + _item.deposit(), + applySha3(arguments.at(0), arguments.at(1), _item.location()) + ); + else + { + bool invMem = SemanticInformation::invalidatesMemory(_item.instruction()); + bool invStor = SemanticInformation::invalidatesStorage(_item.instruction()); + // We could be a bit more fine-grained here (CALL only invalidates part of + // memory, etc), but we do not for now. + if (invMem) + resetMemory(); + if (invStor) + resetStorage(); + if (invMem || invStor) + m_sequenceNumber += 2; // Increment by two because it can read and write + assertThrow(info.ret <= 1, InvalidDeposit, ""); + if (info.ret == 1) + setStackElement( + m_stackHeight + _item.deposit(), + m_expressionClasses->find(_item, arguments, _copyItem) + ); + } + } + m_stackElements.erase( + m_stackElements.upper_bound(m_stackHeight + _item.deposit()), + m_stackElements.end() + ); + m_stackHeight += _item.deposit(); + } + return op; +} + +/// Helper function for KnownState::reduceToCommonKnowledge, removes everything from +/// _this which is not in or not equal to the value in _other. +template <class _Mapping> void intersect(_Mapping& _this, _Mapping const& _other) +{ + for (auto it = _this.begin(); it != _this.end();) + if (_other.count(it->first) && _other.at(it->first) == it->second) + ++it; + else + it = _this.erase(it); +} + +void KnownState::reduceToCommonKnowledge(KnownState const& _other, bool _combineSequenceNumbers) +{ + int stackDiff = m_stackHeight - _other.m_stackHeight; + for (auto it = m_stackElements.begin(); it != m_stackElements.end();) + if (_other.m_stackElements.count(it->first - stackDiff)) + { + Id other = _other.m_stackElements.at(it->first - stackDiff); + if (it->second == other) + ++it; + else + { + set<u256> theseTags = tagsInExpression(it->second); + set<u256> otherTags = tagsInExpression(other); + if (!theseTags.empty() && !otherTags.empty()) + { + theseTags.insert(otherTags.begin(), otherTags.end()); + it->second = tagUnion(theseTags); + ++it; + } + else + it = m_stackElements.erase(it); + } + } + else + it = m_stackElements.erase(it); + + // Use the smaller stack height. Essential to terminate in case of loops. + if (m_stackHeight > _other.m_stackHeight) + { + map<int, Id> shiftedStack; + for (auto const& stackElement: m_stackElements) + shiftedStack[stackElement.first - stackDiff] = stackElement.second; + m_stackElements = move(shiftedStack); + m_stackHeight = _other.m_stackHeight; + } + + intersect(m_storageContent, _other.m_storageContent); + intersect(m_memoryContent, _other.m_memoryContent); + if (_combineSequenceNumbers) + m_sequenceNumber = max(m_sequenceNumber, _other.m_sequenceNumber); +} + +bool KnownState::operator==(KnownState const& _other) const +{ + if (m_storageContent != _other.m_storageContent || m_memoryContent != _other.m_memoryContent) + return false; + int stackDiff = m_stackHeight - _other.m_stackHeight; + auto thisIt = m_stackElements.cbegin(); + auto otherIt = _other.m_stackElements.cbegin(); + for (; thisIt != m_stackElements.cend() && otherIt != _other.m_stackElements.cend(); ++thisIt, ++otherIt) + if (thisIt->first - stackDiff != otherIt->first || thisIt->second != otherIt->second) + return false; + return (thisIt == m_stackElements.cend() && otherIt == _other.m_stackElements.cend()); +} + +ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location) +{ + if (m_stackElements.count(_stackHeight)) + return m_stackElements.at(_stackHeight); + // Stack element not found (not assigned yet), create new unknown equivalence class. + return m_stackElements[_stackHeight] = + m_expressionClasses->find(AssemblyItem(UndefinedItem, _stackHeight, _location)); +} + +KnownState::Id KnownState::relativeStackElement(int _stackOffset, SourceLocation const& _location) +{ + return stackElement(m_stackHeight + _stackOffset, _location); +} + +void KnownState::clearTagUnions() +{ + for (auto it = m_stackElements.begin(); it != m_stackElements.end();) + if (m_tagUnions.left.count(it->second)) + it = m_stackElements.erase(it); + else + ++it; +} + +void KnownState::setStackElement(int _stackHeight, Id _class) +{ + m_stackElements[_stackHeight] = _class; +} + +void KnownState::swapStackElements( + int _stackHeightA, + int _stackHeightB, + SourceLocation const& _location +) +{ + assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements."); + // ensure they are created + stackElement(_stackHeightA, _location); + stackElement(_stackHeightB, _location); + + swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]); +} + +KnownState::StoreOperation KnownState::storeInStorage( + Id _slot, + Id _value, + SourceLocation const& _location) +{ + if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value) + // do not execute the storage if we know that the value is already there + return StoreOperation(); + m_sequenceNumber++; + decltype(m_storageContent) storageContents; + // Copy over all values (i.e. retain knowledge about them) where we know that this store + // operation will not destroy the knowledge. Specifically, we copy storage locations we know + // are different from _slot or locations where we know that the stored value is equal to _value. + for (auto const& storageItem: m_storageContent) + if (m_expressionClasses->knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value) + storageContents.insert(storageItem); + m_storageContent = move(storageContents); + + AssemblyItem item(Instruction::SSTORE, _location); + Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber); + StoreOperation operation(StoreOperation::Storage, _slot, m_sequenceNumber, id); + m_storageContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; + + return operation; +} + +ExpressionClasses::Id KnownState::loadFromStorage(Id _slot, SourceLocation const& _location) +{ + if (m_storageContent.count(_slot)) + return m_storageContent.at(_slot); + + AssemblyItem item(Instruction::SLOAD, _location); + return m_storageContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber); +} + +KnownState::StoreOperation KnownState::storeInMemory(Id _slot, Id _value, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value) + // do not execute the store if we know that the value is already there + return StoreOperation(); + m_sequenceNumber++; + decltype(m_memoryContent) memoryContents; + // copy over values at points where we know that they are different from _slot by at least 32 + for (auto const& memoryItem: m_memoryContent) + if (m_expressionClasses->knownToBeDifferentBy32(memoryItem.first, _slot)) + memoryContents.insert(memoryItem); + m_memoryContent = move(memoryContents); + + AssemblyItem item(Instruction::MSTORE, _location); + Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber); + StoreOperation operation(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id)); + m_memoryContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; + return operation; +} + +ExpressionClasses::Id KnownState::loadFromMemory(Id _slot, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot)) + return m_memoryContent.at(_slot); + + AssemblyItem item(Instruction::MLOAD, _location); + return m_memoryContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber); +} + +KnownState::Id KnownState::applySha3( + Id _start, + Id _length, + SourceLocation const& _location +) +{ + AssemblyItem sha3Item(Instruction::SHA3, _location); + // Special logic if length is a short constant, otherwise we cannot tell. + u256 const* l = m_expressionClasses->knownConstant(_length); + // unknown or too large length + if (!l || *l > 128) + return m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber); + + vector<Id> arguments; + for (u256 i = 0; i < *l; i += 32) + { + Id slot = m_expressionClasses->find( + AssemblyItem(Instruction::ADD, _location), + {_start, m_expressionClasses->find(i)} + ); + arguments.push_back(loadFromMemory(slot, _location)); + } + if (m_knownSha3Hashes.count(arguments)) + return m_knownSha3Hashes.at(arguments); + Id v; + // If all arguments are known constants, compute the sha3 here + if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses->knownConstant(_a); })) + { + bytes data; + for (Id a: arguments) + data += toBigEndian(*m_expressionClasses->knownConstant(a)); + data.resize(size_t(*l)); + v = m_expressionClasses->find(AssemblyItem(u256(sha3(data)), _location)); + } + else + v = m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber); + return m_knownSha3Hashes[arguments] = v; +} + +set<u256> KnownState::tagsInExpression(KnownState::Id _expressionId) +{ + if (m_tagUnions.left.count(_expressionId)) + return m_tagUnions.left.at(_expressionId); + // Might be a tag, then return the set of itself. + ExpressionClasses::Expression expr = m_expressionClasses->representative(_expressionId); + if (expr.item && expr.item->type() == PushTag) + return set<u256>({expr.item->data()}); + else + return set<u256>(); +} + +KnownState::Id KnownState::tagUnion(set<u256> _tags) +{ + if (m_tagUnions.right.count(_tags)) + return m_tagUnions.right.at(_tags); + else + { + Id id = m_expressionClasses->newClass(SourceLocation()); + m_tagUnions.right.insert(make_pair(_tags, id)); + return id; + } +} + diff --git a/libevmasm/KnownState.h b/libevmasm/KnownState.h new file mode 100644 index 00000000..c1c602dc --- /dev/null +++ b/libevmasm/KnownState.h @@ -0,0 +1,179 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file KnownState.h + * @author Christian <c@ethdev.com> + * @date 2015 + * Contains knowledge about the state of the virtual machine at a specific instruction. + */ + +#pragma once + +#include <vector> +#include <map> +#include <set> +#include <tuple> +#include <memory> +#include <ostream> +#pragma warning(push) +#pragma GCC diagnostic push +#pragma clang diagnostic ignored "-Wredeclared-class-member" +#include <boost/bimap.hpp> +#pragma warning(pop) +#pragma GCC diagnostic pop +#include <libdevcore/CommonIO.h> +#include <libdevcore/Exceptions.h> +#include <libevmasm/ExpressionClasses.h> +#include <libevmasm/SemanticInformation.h> + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector<AssemblyItem>; + +/** + * Class to infer and store knowledge about the state of the virtual machine at a specific + * instruction. + * + * The general workings are that for each assembly item that is fed, an equivalence class is + * derived from the operation and the equivalence class of its arguments. DUPi, SWAPi and some + * arithmetic instructions are used to infer equivalences while these classes are determined. + */ +class KnownState +{ +public: + using Id = ExpressionClasses::Id; + struct StoreOperation + { + enum Target { Invalid, Memory, Storage }; + StoreOperation(): target(Invalid), sequenceNumber(-1) {} + StoreOperation( + Target _target, + Id _slot, + unsigned _sequenceNumber, + Id _expression + ): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {} + bool isValid() const { return target != Invalid; } + Target target; + Id slot; + unsigned sequenceNumber; + Id expression; + }; + + explicit KnownState( + std::shared_ptr<ExpressionClasses> _expressionClasses = std::make_shared<ExpressionClasses>() + ): m_expressionClasses(_expressionClasses) + { + } + + /// Streams debugging information to @a _out. + std::ostream& stream(std::ostream& _out) const; + + /// Feeds the item into the system for analysis. + /// @returns a possible store operation + StoreOperation feedItem(AssemblyItem const& _item, bool _copyItem = false); + + /// Resets any knowledge about storage. + void resetStorage() { m_storageContent.clear(); } + /// Resets any knowledge about storage. + void resetMemory() { m_memoryContent.clear(); } + /// Resets any knowledge about the current stack. + void resetStack() { m_stackElements.clear(); m_stackHeight = 0; } + /// Resets any knowledge. + void reset() { resetStorage(); resetMemory(); resetStack(); } + + unsigned sequenceNumber() const { return m_sequenceNumber; } + + /// Replaces the state by the intersection with _other, i.e. only equal knowledge is retained. + /// If the stack heighht is different, the smaller one is used and the stack is compared + /// relatively. + /// @param _combineSequenceNumbers if true, sets the sequence number to the maximum of both + void reduceToCommonKnowledge(KnownState const& _other, bool _combineSequenceNumbers); + + /// @returns a shared pointer to a copy of this state. + std::shared_ptr<KnownState> copy() const { return std::make_shared<KnownState>(*this); } + + /// @returns true if the knowledge about the state of both objects is (known to be) equal. + bool operator==(KnownState const& _other) const; + + /// Retrieves the current equivalence class fo the given stack element (or generates a new + /// one if it does not exist yet). + Id stackElement(int _stackHeight, SourceLocation const& _location); + /// @returns the stackElement relative to the current stack height. + Id relativeStackElement(int _stackOffset, SourceLocation const& _location = SourceLocation()); + + /// @returns its set of tags if the given expression class is a known tag union; returns a set + /// containing the tag if it is a PushTag expression and the empty set otherwise. + std::set<u256> tagsInExpression(Id _expressionId); + /// During analysis, different tags on the stack are partially treated as the same class. + /// This removes such classes not to confuse later analyzers. + void clearTagUnions(); + + int stackHeight() const { return m_stackHeight; } + std::map<int, Id> const& stackElements() const { return m_stackElements; } + ExpressionClasses& expressionClasses() const { return *m_expressionClasses; } + + std::map<Id, Id> const& storageContent() const { return m_storageContent; } + +private: + /// Assigns a new equivalence class to the next sequence number of the given stack element. + void setStackElement(int _stackHeight, Id _class); + /// Swaps the given stack elements in their next sequence number. + void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location); + + /// Increments the sequence number, deletes all storage information that might be overwritten + /// and stores the new value at the given slot. + /// @returns the store operation, which might be invalid if storage was not modified + StoreOperation storeInStorage(Id _slot, Id _value, SourceLocation const& _location); + /// Retrieves the current value at the given slot in storage or creates a new special sload class. + Id loadFromStorage(Id _slot, SourceLocation const& _location); + /// Increments the sequence number, deletes all memory information that might be overwritten + /// and stores the new value at the given slot. + /// @returns the store operation, which might be invalid if memory was not modified + StoreOperation storeInMemory(Id _slot, Id _value, SourceLocation const& _location); + /// Retrieves the current value at the given slot in memory or creates a new special mload class. + Id loadFromMemory(Id _slot, SourceLocation const& _location); + /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory. + Id applySha3(Id _start, Id _length, SourceLocation const& _location); + + /// @returns a new or already used Id representing the given set of tags. + Id tagUnion(std::set<u256> _tags); + + /// Current stack height, can be negative. + int m_stackHeight = 0; + /// Current stack layout, mapping stack height -> equivalence class + std::map<int, Id> m_stackElements; + /// Current sequence number, this is incremented with each modification to storage or memory. + unsigned m_sequenceNumber = 1; + /// Knowledge about storage content. + std::map<Id, Id> m_storageContent; + /// Knowledge about memory content. Keys are memory addresses, note that the values overlap + /// and are not contained here if they are not completely known. + std::map<Id, Id> m_memoryContent; + /// Keeps record of all sha3 hashes that are computed. + std::map<std::vector<Id>, Id> m_knownSha3Hashes; + /// Structure containing the classes of equivalent expressions. + std::shared_ptr<ExpressionClasses> m_expressionClasses; + /// Container for unions of tags stored on the stack. + boost::bimap<Id, std::set<u256>> m_tagUnions; +}; + +} +} diff --git a/libevmasm/LinkerObject.cpp b/libevmasm/LinkerObject.cpp new file mode 100644 index 00000000..ceb864a1 --- /dev/null +++ b/libevmasm/LinkerObject.cpp @@ -0,0 +1,62 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file LinkerObject.cpp + * @author Christian R <c@ethdev.com> + * @date 2015 + */ + +#include <libevmasm/LinkerObject.h> +#include <libdevcore/CommonData.h> + +using namespace dev; +using namespace dev::eth; +using namespace std; + +void LinkerObject::append(LinkerObject const& _other) +{ + for (auto const& ref: _other.linkReferences) + linkReferences[ref.first + bytecode.size()] = ref.second; + bytecode += _other.bytecode; +} + +void LinkerObject::link(map<string, h160> const& _libraryAddresses) +{ + std::map<size_t, std::string> remainingRefs; + for (auto const& linkRef: linkReferences) + { + auto it = _libraryAddresses.find(linkRef.second); + if (it == _libraryAddresses.end()) + remainingRefs.insert(linkRef); + else + it->second.ref().copyTo(ref(bytecode).cropped(linkRef.first, 20)); + } + linkReferences.swap(remainingRefs); +} + +string LinkerObject::toHex() const +{ + string hex = dev::toHex(bytecode); + for (auto const& ref: linkReferences) + { + size_t pos = ref.first * 2; + string const& name = ref.second; + hex[pos] = hex[pos + 1] = hex[pos + 38] = hex[pos + 39] = '_'; + for (size_t i = 0; i < 36; ++i) + hex[pos + 2 + i] = i < name.size() ? name[i] : '_'; + } + return hex; +} diff --git a/libevmasm/LinkerObject.h b/libevmasm/LinkerObject.h new file mode 100644 index 00000000..83d2bd7e --- /dev/null +++ b/libevmasm/LinkerObject.h @@ -0,0 +1,55 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Assembly.h + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#pragma once + +#include <libdevcore/Common.h> +#include <libdevcore/FixedHash.h> + +namespace dev +{ +namespace eth +{ + +/** + * Binary object that potentially still needs to be linked (i.e. addresses of other contracts + * need to be filled in). + */ +struct LinkerObject +{ + bytes bytecode; + /// Map from offsets in bytecode to library identifiers. The addresses starting at those offsets + /// need to be replaced by the actual addresses by the linker. + std::map<size_t, std::string> linkReferences; + + /// Appends the bytecode of @a _other and incorporates its link references. + void append(LinkerObject const& _other); + + /// Links the given libraries by replacing their uses in the code and removes them from the references. + void link(std::map<std::string, h160> const& _libraryAddresses); + + /// @returns a hex representation of the bytecode of the given object, replacing unlinked + /// addresses by placeholders. + std::string toHex() const; +}; + +} +} diff --git a/libevmasm/PathGasMeter.cpp b/libevmasm/PathGasMeter.cpp new file mode 100644 index 00000000..8f7314f8 --- /dev/null +++ b/libevmasm/PathGasMeter.cpp @@ -0,0 +1,128 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file PathGasMeter.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + */ + +#include "PathGasMeter.h" +#include <libevmasm/KnownState.h> +#include <libevmasm/SemanticInformation.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + +PathGasMeter::PathGasMeter(AssemblyItems const& _items): + m_items(_items) +{ + for (size_t i = 0; i < m_items.size(); ++i) + if (m_items[i].type() == Tag) + m_tagPositions[m_items[i].data()] = i; +} + +GasMeter::GasConsumption PathGasMeter::estimateMax( + size_t _startIndex, + shared_ptr<KnownState> const& _state +) +{ + auto path = unique_ptr<GasPath>(new GasPath()); + path->index = _startIndex; + path->state = _state->copy(); + m_queue.push_back(move(path)); + + GasMeter::GasConsumption gas; + while (!m_queue.empty() && !gas.isInfinite) + gas = max(gas, handleQueueItem()); + return gas; +} + +GasMeter::GasConsumption PathGasMeter::handleQueueItem() +{ + assertThrow(!m_queue.empty(), OptimizerException, ""); + + unique_ptr<GasPath> path = move(m_queue.back()); + m_queue.pop_back(); + + shared_ptr<KnownState> state = path->state; + GasMeter meter(state, path->largestMemoryAccess); + ExpressionClasses& classes = state->expressionClasses(); + GasMeter::GasConsumption gas = path->gas; + size_t index = path->index; + + if (index >= m_items.size() || (index > 0 && m_items.at(index).type() != Tag)) + // Invalid jump usually provokes an out-of-gas exception, but we want to give an upper + // bound on the gas that is needed without changing the behaviour, so it is fine to + // return the current gas value. + return gas; + + set<u256> jumpTags; + for (; index < m_items.size() && !gas.isInfinite; ++index) + { + bool branchStops = false; + jumpTags.clear(); + AssemblyItem const& item = m_items.at(index); + if (item.type() == Tag || item == AssemblyItem(eth::Instruction::JUMPDEST)) + { + // Do not allow any backwards jump. This is quite restrictive but should work for + // the simplest things. + if (path->visitedJumpdests.count(index)) + return GasMeter::GasConsumption::infinite(); + path->visitedJumpdests.insert(index); + } + else if (item == AssemblyItem(eth::Instruction::JUMP)) + { + branchStops = true; + jumpTags = state->tagsInExpression(state->relativeStackElement(0)); + if (jumpTags.empty()) // unknown jump destination + return GasMeter::GasConsumption::infinite(); + } + else if (item == AssemblyItem(eth::Instruction::JUMPI)) + { + ExpressionClasses::Id condition = state->relativeStackElement(-1); + if (classes.knownNonZero(condition) || !classes.knownZero(condition)) + { + jumpTags = state->tagsInExpression(state->relativeStackElement(0)); + if (jumpTags.empty()) // unknown jump destination + return GasMeter::GasConsumption::infinite(); + } + branchStops = classes.knownNonZero(condition); + } + else if (SemanticInformation::altersControlFlow(item)) + branchStops = true; + + gas += meter.estimateMax(item); + + for (u256 const& tag: jumpTags) + { + auto newPath = unique_ptr<GasPath>(new GasPath()); + newPath->index = m_items.size(); + if (m_tagPositions.count(tag)) + newPath->index = m_tagPositions.at(tag); + newPath->gas = gas; + newPath->largestMemoryAccess = meter.largestMemoryAccess(); + newPath->state = state->copy(); + newPath->visitedJumpdests = path->visitedJumpdests; + m_queue.push_back(move(newPath)); + } + + if (branchStops) + break; + } + + return gas; +} diff --git a/libevmasm/PathGasMeter.h b/libevmasm/PathGasMeter.h new file mode 100644 index 00000000..1ada460a --- /dev/null +++ b/libevmasm/PathGasMeter.h @@ -0,0 +1,66 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file PathGasMeter.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + */ + +#pragma once + +#include <set> +#include <vector> +#include <memory> +#include <libevmasm/GasMeter.h> + +namespace dev +{ +namespace eth +{ + +class KnownState; + +struct GasPath +{ + size_t index = 0; + std::shared_ptr<KnownState> state; + u256 largestMemoryAccess; + GasMeter::GasConsumption gas; + std::set<size_t> visitedJumpdests; +}; + +/** + * Computes an upper bound on the gas usage of a computation starting at a certain position in + * a list of AssemblyItems in a given state until the computation stops. + * Can be used to estimate the gas usage of functions on any given input. + */ +class PathGasMeter +{ +public: + PathGasMeter(AssemblyItems const& _items); + + GasMeter::GasConsumption estimateMax(size_t _startIndex, std::shared_ptr<KnownState> const& _state); + +private: + GasMeter::GasConsumption handleQueueItem(); + + std::vector<std::unique_ptr<GasPath>> m_queue; + std::map<u256, size_t> m_tagPositions; + AssemblyItems const& m_items; +}; + +} +} diff --git a/libevmasm/SemanticInformation.cpp b/libevmasm/SemanticInformation.cpp new file mode 100644 index 00000000..ea579b83 --- /dev/null +++ b/libevmasm/SemanticInformation.cpp @@ -0,0 +1,181 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file SemanticInformation.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Helper to provide semantic information about assembly items. + */ + +#include <libevmasm/SemanticInformation.h> +#include <libevmasm/AssemblyItem.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + +bool SemanticInformation::breaksCSEAnalysisBlock(AssemblyItem const& _item) +{ + switch (_item.type()) + { + default: + case UndefinedItem: + case Tag: + return true; + case Push: + case PushString: + case PushTag: + case PushSub: + case PushSubSize: + case PushProgramSize: + case PushData: + case PushLibraryAddress: + return false; + case Operation: + { + if (isSwapInstruction(_item) || isDupInstruction(_item)) + return false; + if (_item.instruction() == Instruction::GAS || _item.instruction() == Instruction::PC) + return true; // GAS and PC assume a specific order of opcodes + if (_item.instruction() == Instruction::MSIZE) + return true; // msize is modified already by memory access, avoid that for now + InstructionInfo info = instructionInfo(_item.instruction()); + if (_item.instruction() == Instruction::SSTORE) + return false; + if (_item.instruction() == Instruction::MSTORE) + return false; + //@todo: We do not handle the following memory instructions for now: + // calldatacopy, codecopy, extcodecopy, mstore8, + // msize (note that msize also depends on memory read access) + + // the second requirement will be lifted once it is implemented + return info.sideEffects || info.args > 2; + } + } +} + +bool SemanticInformation::isCommutativeOperation(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + switch (_item.instruction()) + { + case Instruction::ADD: + case Instruction::MUL: + case Instruction::EQ: + case Instruction::AND: + case Instruction::OR: + case Instruction::XOR: + return true; + default: + return false; + } +} + +bool SemanticInformation::isDupInstruction(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + return Instruction::DUP1 <= _item.instruction() && _item.instruction() <= Instruction::DUP16; +} + +bool SemanticInformation::isSwapInstruction(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + return Instruction::SWAP1 <= _item.instruction() && _item.instruction() <= Instruction::SWAP16; +} + +bool SemanticInformation::isJumpInstruction(AssemblyItem const& _item) +{ + return _item == AssemblyItem(Instruction::JUMP) || _item == AssemblyItem(Instruction::JUMPI); +} + +bool SemanticInformation::altersControlFlow(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + switch (_item.instruction()) + { + // note that CALL, CALLCODE and CREATE do not really alter the control flow, because we + // continue on the next instruction + case Instruction::JUMP: + case Instruction::JUMPI: + case Instruction::RETURN: + case Instruction::SUICIDE: + case Instruction::STOP: + return true; + default: + return false; + } +} + + +bool SemanticInformation::isDeterministic(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return true; + + switch (_item.instruction()) + { + case Instruction::CALL: + case Instruction::CALLCODE: + case Instruction::DELEGATECALL: + case Instruction::CREATE: + case Instruction::GAS: + case Instruction::PC: + case Instruction::MSIZE: // depends on previous writes and reads, not only on content + case Instruction::BALANCE: // depends on previous calls + case Instruction::EXTCODESIZE: + return false; + default: + return true; + } +} + +bool SemanticInformation::invalidatesMemory(Instruction _instruction) +{ + switch (_instruction) + { + case Instruction::CALLDATACOPY: + case Instruction::CODECOPY: + case Instruction::EXTCODECOPY: + case Instruction::MSTORE: + case Instruction::MSTORE8: + case Instruction::CALL: + case Instruction::CALLCODE: + case Instruction::DELEGATECALL: + return true; + default: + return false; + } +} + +bool SemanticInformation::invalidatesStorage(Instruction _instruction) +{ + switch (_instruction) + { + case Instruction::CALL: + case Instruction::CALLCODE: + case Instruction::DELEGATECALL: + case Instruction::CREATE: + case Instruction::SSTORE: + return true; + default: + return false; + } +} diff --git a/libevmasm/SemanticInformation.h b/libevmasm/SemanticInformation.h new file mode 100644 index 00000000..094f4591 --- /dev/null +++ b/libevmasm/SemanticInformation.h @@ -0,0 +1,59 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file SemanticInformation.h + * @author Christian <c@ethdev.com> + * @date 2015 + * Helper to provide semantic information about assembly items. + */ + +#pragma once + +#include <libevmcore/Instruction.h> + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; + +/** + * Helper functions to provide context-independent information about assembly items. + */ +struct SemanticInformation +{ + /// @returns true if the given items starts a new block for common subexpression analysis. + static bool breaksCSEAnalysisBlock(AssemblyItem const& _item); + /// @returns true if the item is a two-argument operation whose value does not depend on the + /// order of its arguments. + static bool isCommutativeOperation(AssemblyItem const& _item); + static bool isDupInstruction(AssemblyItem const& _item); + static bool isSwapInstruction(AssemblyItem const& _item); + static bool isJumpInstruction(AssemblyItem const& _item); + static bool altersControlFlow(AssemblyItem const& _item); + /// @returns false if the value put on the stack by _item depends on anything else than + /// the information in the current block header, memory, storage or stack. + static bool isDeterministic(AssemblyItem const& _item); + /// @returns true if the given instruction modifies memory. + static bool invalidatesMemory(Instruction _instruction); + /// @returns true if the given instruction modifies storage (even indirectly). + static bool invalidatesStorage(Instruction _instruction); +}; + +} +} diff --git a/libevmasm/SourceLocation.h b/libevmasm/SourceLocation.h new file mode 100644 index 00000000..b8b57b60 --- /dev/null +++ b/libevmasm/SourceLocation.h @@ -0,0 +1,102 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @author Lefteris Karapetsas <lefteris@ethdev.com> + * @date 2015 + * Represents a location in a source file + */ + +#pragma once + +#include <memory> +#include <string> +#include <ostream> +#include <tuple> + +namespace dev +{ + +/** + * Representation of an interval of source positions. + * The interval includes start and excludes end. + */ +struct SourceLocation +{ + SourceLocation(int _start, int _end, std::shared_ptr<std::string const> _sourceName): + start(_start), end(_end), sourceName(_sourceName) { } + SourceLocation(): start(-1), end(-1) { } + + SourceLocation(SourceLocation const& _other): + start(_other.start), + end(_other.end), + sourceName(_other.sourceName) + {} + + SourceLocation& operator=(SourceLocation const& _other) + { + if (&_other == this) + return *this; + + start = _other.start; + end = _other.end; + sourceName = _other.sourceName; + return *this; + } + + bool operator==(SourceLocation const& _other) const { return start == _other.start && end == _other.end;} + bool operator!=(SourceLocation const& _other) const { return !operator==(_other); } + inline bool operator<(SourceLocation const& _other) const; + inline bool contains(SourceLocation const& _other) const; + inline bool intersects(SourceLocation const& _other) const; + + bool isEmpty() const { return start == -1 && end == -1; } + + int start; + int end; + std::shared_ptr<std::string const> sourceName; +}; + +/// Stream output for Location (used e.g. in boost exceptions). +inline std::ostream& operator<<(std::ostream& _out, SourceLocation const& _location) +{ + if (_location.isEmpty()) + return _out << "NO_LOCATION_SPECIFIED"; + return _out << *_location.sourceName << "[" << _location.start << "," << _location.end << ")"; +} + +bool SourceLocation::operator<(SourceLocation const& _other) const +{ + if (!sourceName || !_other.sourceName) + return int(!!sourceName) < int(!!_other.sourceName); + return make_tuple(*sourceName, start, end) < make_tuple(*_other.sourceName, _other.start, _other.end); +} + +bool SourceLocation::contains(SourceLocation const& _other) const +{ + if (isEmpty() || _other.isEmpty() || !sourceName || !_other.sourceName || *sourceName != *_other.sourceName) + return false; + return start <= _other.start && _other.end <= end; +} + +bool SourceLocation::intersects(SourceLocation const& _other) const +{ + if (isEmpty() || _other.isEmpty() || !sourceName || !_other.sourceName || *sourceName != *_other.sourceName) + return false; + return _other.start < end && start < _other.end; +} + +} diff --git a/libevmasm/Version.cpp b/libevmasm/Version.cpp new file mode 100644 index 00000000..c1379848 --- /dev/null +++ b/libevmasm/Version.cpp @@ -0,0 +1,38 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @author Christian <c@ethdev.com> + * @date 2015 + * Versioning. + */ + +#include <string> +#include <ethereum/BuildInfo.h> +#include <libdevcore/Common.h> +#include <libevmasm/Version.h> + +using namespace dev; +using namespace std; + +char const* dev::eth::VersionNumberLibEvmAsm = ETH_PROJECT_VERSION; +extern string const dev::eth::VersionStringLibEvmAsm = + string(dev::eth::VersionNumberLibEvmAsm) + + "-" + + string(DEV_QUOTED(ETH_COMMIT_HASH)).substr(0, 8) + + (ETH_CLEAN_REPO ? "" : "*") + + "/" DEV_QUOTED(ETH_BUILD_TYPE) "-" DEV_QUOTED(ETH_BUILD_PLATFORM); + diff --git a/libevmasm/Version.h b/libevmasm/Version.h new file mode 100644 index 00000000..8cba6e83 --- /dev/null +++ b/libevmasm/Version.h @@ -0,0 +1,36 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @author Christian <c@ethdev.com> + * @date 2015 + * Versioning. + */ + +#pragma once + +#include <string> + +namespace dev +{ +namespace eth +{ + +extern char const* VersionNumberLibEvmAsm; +extern std::string const VersionStringLibEvmAsm; + +} +} diff --git a/liblll/All.h b/liblll/All.h new file mode 100644 index 00000000..7c4192f6 --- /dev/null +++ b/liblll/All.h @@ -0,0 +1,6 @@ +#pragma once + +#include "CodeFragment.h" +#include "Compiler.h" +#include "CompilerState.h" +#include "Parser.h" diff --git a/liblll/CMakeLists.txt b/liblll/CMakeLists.txt new file mode 100644 index 00000000..97a1a186 --- /dev/null +++ b/liblll/CMakeLists.txt @@ -0,0 +1,23 @@ +cmake_policy(SET CMP0015 NEW) +set(CMAKE_AUTOMOC OFF) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB") + +aux_source_directory(. SRC_LIST) + +#include_directories(BEFORE ${JSONCPP_INCLUDE_DIRS}) +#include_directories(${Boost_INCLUDE_DIRS}) + +set(EXECUTABLE lll) + +file(GLOB HEADERS "*.h") + +include_directories(BEFORE ..) +add_library(${EXECUTABLE} ${SRC_LIST} ${HEADERS}) + +eth_use(${EXECUTABLE} REQUIRED Solidity::evmasm) +#target_link_libraries(${EXECUTABLE} evmasm) + +install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) +install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) + diff --git a/liblll/CodeFragment.cpp b/liblll/CodeFragment.cpp new file mode 100644 index 00000000..64680d5a --- /dev/null +++ b/liblll/CodeFragment.cpp @@ -0,0 +1,586 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file CodeFragment.cpp + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#include "CodeFragment.h" + +#include <boost/algorithm/string.hpp> +#pragma warning(push) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include <boost/spirit/include/support_utree.hpp> +#pragma warning(pop) +#pragma GCC diagnostic pop +#include <libdevcore/Log.h> +#include <libdevcore/CommonIO.h> +#include <libevmcore/Instruction.h> +#include "CompilerState.h" +#include "Parser.h" +using namespace std; +using namespace dev; +using namespace dev::eth; +namespace qi = boost::spirit::qi; +namespace px = boost::phoenix; +namespace sp = boost::spirit; + +void CodeFragment::finalise(CompilerState const& _cs) +{ + if (_cs.usedAlloc && _cs.vars.size() && !m_finalised) + { + m_finalised = true; + m_asm.injectStart(Instruction::MSTORE8); + m_asm.injectStart((u256)((_cs.vars.size() + 2) * 32) - 1); + m_asm.injectStart((u256)1); + } +} + +CodeFragment::CodeFragment(sp::utree const& _t, CompilerState& _s, bool _allowASM) +{ +/* cdebug << "CodeFragment. Locals:"; + for (auto const& i: _s.defs) + cdebug << i.first << ":" << toHex(i.second.m_code); + cdebug << "Args:"; + for (auto const& i: _s.args) + cdebug << i.first << ":" << toHex(i.second.m_code); + cdebug << "Outers:"; + for (auto const& i: _s.outers) + cdebug << i.first << ":" << toHex(i.second.m_code); + debugOutAST(cout, _t); + cout << endl << flush; +*/ + switch (_t.which()) + { + case sp::utree_type::list_type: + constructOperation(_t, _s); + break; + case sp::utree_type::string_type: + { + auto sr = _t.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::string_type>>(); + string s(sr.begin(), sr.end()); + m_asm.append(s); + break; + } + case sp::utree_type::symbol_type: + { + auto sr = _t.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::symbol_type>>(); + string s(sr.begin(), sr.end()); + string us = boost::algorithm::to_upper_copy(s); + if (_allowASM && c_instructions.count(us)) + m_asm.append(c_instructions.at(us)); + else if (_s.defs.count(s)) + m_asm.append(_s.defs.at(s).m_asm); + else if (_s.args.count(s)) + m_asm.append(_s.args.at(s).m_asm); + else if (_s.outers.count(s)) + m_asm.append(_s.outers.at(s).m_asm); + else if (us.find_first_of("1234567890") != 0 && us.find_first_not_of("QWERTYUIOPASDFGHJKLZXCVBNM1234567890_") == string::npos) + { + auto it = _s.vars.find(s); + if (it == _s.vars.end()) + { + bool ok; + tie(it, ok) = _s.vars.insert(make_pair(s, make_pair(_s.stackSize, 32))); + _s.stackSize += 32; + } + m_asm.append((u256)it->second.first); + } + else + error<BareSymbol>(); + + break; + } + case sp::utree_type::any_type: + { + bigint i = *_t.get<bigint*>(); + if (i < 0 || i > bigint(u256(0) - 1)) + error<IntegerOutOfRange>(); + m_asm.append((u256)i); + break; + } + default: break; + } +} + +void CodeFragment::constructOperation(sp::utree const& _t, CompilerState& _s) +{ + if (_t.tag() == 0 && _t.empty()) + error<EmptyList>(); + else if (_t.tag() == 0 && _t.front().which() != sp::utree_type::symbol_type) + error<DataNotExecutable>(); + else + { + string s; + string us; + switch (_t.tag()) + { + case 0: + { + auto sr = _t.front().get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::symbol_type>>(); + s = string(sr.begin(), sr.end()); + us = boost::algorithm::to_upper_copy(s); + break; + } + case 1: + us = "MLOAD"; + break; + case 2: + us = "SLOAD"; + break; + case 3: + us = "MSTORE"; + break; + case 4: + us = "SSTORE"; + break; + case 5: + us = "SEQ"; + break; + case 6: + us = "CALLDATALOAD"; + break; + default:; + } + + auto firstAsString = [&]() + { + auto i = *++_t.begin(); + if (i.tag()) + error<InvalidName>(); + if (i.which() == sp::utree_type::string_type) + { + auto sr = i.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::string_type>>(); + return string(sr.begin(), sr.end()); + } + else if (i.which() == sp::utree_type::symbol_type) + { + auto sr = i.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::symbol_type>>(); + return _s.getDef(string(sr.begin(), sr.end())).m_asm.backString(); + } + return string(); + }; + + auto varAddress = [&](string const& n) + { + auto it = _s.vars.find(n); + if (it == _s.vars.end()) + { + bool ok; + tie(it, ok) = _s.vars.insert(make_pair(n, make_pair(_s.stackSize, 32))); + _s.stackSize += 32; + } + return it->second.first; + }; + + // Operations who args are not standard stack-pushers. + bool nonStandard = true; + if (us == "ASM") + { + int c = 0; + for (auto const& i: _t) + if (c++) + m_asm.append(CodeFragment(i, _s, true).m_asm); + } + else if (us == "INCLUDE") + { + if (_t.size() != 2) + error<IncorrectParameterCount>(); + m_asm.append(CodeFragment::compile(contentsString(firstAsString()), _s).m_asm); + } + else if (us == "SET") + { + if (_t.size() != 3) + error<IncorrectParameterCount>(); + int c = 0; + for (auto const& i: _t) + if (c++ == 2) + m_asm.append(CodeFragment(i, _s, false).m_asm); + m_asm.append((u256)varAddress(firstAsString())); + m_asm.append(Instruction::MSTORE); + } + else if (us == "GET") + { + if (_t.size() != 2) + error<IncorrectParameterCount>(); + m_asm.append((u256)varAddress(firstAsString())); + m_asm.append(Instruction::MLOAD); + } + else if (us == "REF") + m_asm.append((u256)varAddress(firstAsString())); + else if (us == "DEF") + { + string n; + unsigned ii = 0; + if (_t.size() != 3 && _t.size() != 4) + error<IncorrectParameterCount>(); + vector<string> args; + for (auto const& i: _t) + { + if (ii == 1) + { + if (i.tag()) + error<InvalidName>(); + if (i.which() == sp::utree_type::string_type) + { + auto sr = i.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::string_type>>(); + n = string(sr.begin(), sr.end()); + } + else if (i.which() == sp::utree_type::symbol_type) + { + auto sr = i.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::symbol_type>>(); + n = _s.getDef(string(sr.begin(), sr.end())).m_asm.backString(); + } + } + else if (ii == 2) + if (_t.size() == 3) + _s.defs[n] = CodeFragment(i, _s); + else + for (auto const& j: i) + { + if (j.tag() || j.which() != sp::utree_type::symbol_type) + error<InvalidMacroArgs>(); + auto sr = j.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::symbol_type>>(); + args.push_back(string(sr.begin(), sr.end())); + } + else if (ii == 3) + { + auto k = make_pair(n, args.size()); + _s.macros[k].code = i; + _s.macros[k].env = _s.outers; + _s.macros[k].args = args; + for (auto const& i: _s.args) + _s.macros[k].env[i.first] = i.second; + for (auto const& i: _s.defs) + _s.macros[k].env[i.first] = i.second; + } + ++ii; + } + } + else if (us == "LIT") + { + if (_t.size() < 3) + error<IncorrectParameterCount>(); + unsigned ii = 0; + CodeFragment pos; + bytes data; + for (auto const& i: _t) + { + if (ii == 1) + { + pos = CodeFragment(i, _s); + if (pos.m_asm.deposit() != 1) + error<InvalidDeposit>(); + } + else if (ii == 2 && !i.tag() && i.which() == sp::utree_type::string_type) + { + auto sr = i.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::string_type>>(); + data = bytes((byte const*)sr.begin(), (byte const*)sr.end()); + } + else if (ii >= 2 && !i.tag() && i.which() == sp::utree_type::any_type) + { + bigint bi = *i.get<bigint*>(); + if (bi < 0) + error<IntegerOutOfRange>(); + else if (bi > bigint(u256(0) - 1)) + { + if (ii == 2 && _t.size() == 3) + { + // One big int - allow it as hex. + data.resize(bytesRequired(bi)); + toBigEndian(bi, data); + } + else + error<IntegerOutOfRange>(); + } + else + { + data.resize(data.size() + 32); + *(h256*)(&data.back() - 31) = (u256)bi; + } + } + else if (ii) + error<InvalidLiteral>(); + ++ii; + } + m_asm.append((u256)data.size()); + m_asm.append(Instruction::DUP1); + m_asm.append(data); + m_asm.append(pos.m_asm, 1); + m_asm.append(Instruction::CODECOPY); + } + else + nonStandard = false; + + if (nonStandard) + return; + + std::map<std::string, Instruction> const c_arith = { { "+", Instruction::ADD }, { "-", Instruction::SUB }, { "*", Instruction::MUL }, { "/", Instruction::DIV }, { "%", Instruction::MOD }, { "&", Instruction::AND }, { "|", Instruction::OR }, { "^", Instruction::XOR } }; + std::map<std::string, pair<Instruction, bool>> const c_binary = { { "<", { Instruction::LT, false } }, { "<=", { Instruction::GT, true } }, { ">", { Instruction::GT, false } }, { ">=", { Instruction::LT, true } }, { "S<", { Instruction::SLT, false } }, { "S<=", { Instruction::SGT, true } }, { "S>", { Instruction::SGT, false } }, { "S>=", { Instruction::SLT, true } }, { "=", { Instruction::EQ, false } }, { "!=", { Instruction::EQ, true } } }; + std::map<std::string, Instruction> const c_unary = { { "!", Instruction::ISZERO } }; + + vector<CodeFragment> code; + CompilerState ns = _s; + ns.vars.clear(); + ns.usedAlloc = false; + int c = _t.tag() ? 1 : 0; + for (auto const& i: _t) + if (c++) + { + if (us == "LLL" && c == 1) + code.push_back(CodeFragment(i, ns)); + else + code.push_back(CodeFragment(i, _s)); + } + auto requireSize = [&](unsigned s) { if (code.size() != s) error<IncorrectParameterCount>(); }; + auto requireMinSize = [&](unsigned s) { if (code.size() < s) error<IncorrectParameterCount>(); }; + auto requireMaxSize = [&](unsigned s) { if (code.size() > s) error<IncorrectParameterCount>(); }; + auto requireDeposit = [&](unsigned i, int s) { if (code[i].m_asm.deposit() != s) error<InvalidDeposit>(); }; + + if (_s.macros.count(make_pair(s, code.size()))) + { + Macro const& m = _s.macros.at(make_pair(s, code.size())); + CompilerState cs = _s; + for (auto const& i: m.env) + cs.outers[i.first] = i.second; + for (auto const& i: cs.defs) + cs.outers[i.first] = i.second; + cs.defs.clear(); + for (unsigned i = 0; i < m.args.size(); ++i) + { + //requireDeposit(i, 1); + cs.args[m.args[i]] = code[i]; + } + m_asm.append(CodeFragment(m.code, cs).m_asm); + for (auto const& i: cs.defs) + _s.defs[i.first] = i.second; + for (auto const& i: cs.macros) + _s.macros.insert(i); + } + else if (c_instructions.count(us)) + { + auto it = c_instructions.find(us); + int ea = instructionInfo(it->second).args; + if (ea >= 0) + requireSize(ea); + else + requireMinSize(-ea); + + for (unsigned i = code.size(); i; --i) + m_asm.append(code[i - 1].m_asm, 1); + m_asm.append(it->second); + } + else if (c_arith.count(us)) + { + auto it = c_arith.find(us); + requireMinSize(1); + for (unsigned i = code.size(); i; --i) + { + requireDeposit(i - 1, 1); + m_asm.append(code[i - 1].m_asm, 1); + } + for (unsigned i = 1; i < code.size(); ++i) + m_asm.append(it->second); + } + else if (c_binary.count(us)) + { + auto it = c_binary.find(us); + requireSize(2); + requireDeposit(0, 1); + requireDeposit(1, 1); + m_asm.append(code[1].m_asm, 1); + m_asm.append(code[0].m_asm, 1); + m_asm.append(it->second.first); + if (it->second.second) + m_asm.append(Instruction::ISZERO); + } + else if (c_unary.count(us)) + { + auto it = c_unary.find(us); + requireSize(1); + requireDeposit(0, 1); + m_asm.append(code[0].m_asm, 1); + m_asm.append(it->second); + } + else if (us == "IF") + { + requireSize(3); + requireDeposit(0, 1); + int minDep = min(code[1].m_asm.deposit(), code[2].m_asm.deposit()); + + m_asm.append(code[0].m_asm); + auto pos = m_asm.appendJumpI(); + m_asm.onePath(); + m_asm.append(code[2].m_asm, minDep); + auto end = m_asm.appendJump(); + m_asm.otherPath(); + m_asm << pos.tag(); + m_asm.append(code[1].m_asm, minDep); + m_asm << end.tag(); + m_asm.donePaths(); + } + else if (us == "WHEN" || us == "UNLESS") + { + requireSize(2); + requireDeposit(0, 1); + + m_asm.append(code[0].m_asm); + if (us == "WHEN") + m_asm.append(Instruction::ISZERO); + auto end = m_asm.appendJumpI(); + m_asm.onePath(); + m_asm.otherPath(); + m_asm.append(code[1].m_asm, 0); + m_asm << end.tag(); + m_asm.donePaths(); + } + else if (us == "WHILE") + { + requireSize(2); + requireDeposit(0, 1); + + auto begin = m_asm.append(); + m_asm.append(code[0].m_asm); + m_asm.append(Instruction::ISZERO); + auto end = m_asm.appendJumpI(); + m_asm.append(code[1].m_asm, 0); + m_asm.appendJump(begin); + m_asm << end.tag(); + } + else if (us == "FOR") + { + requireSize(4); + requireDeposit(1, 1); + + m_asm.append(code[0].m_asm, 0); + auto begin = m_asm.append(); + m_asm.append(code[1].m_asm); + m_asm.append(Instruction::ISZERO); + auto end = m_asm.appendJumpI(); + m_asm.append(code[3].m_asm, 0); + m_asm.append(code[2].m_asm, 0); + m_asm.appendJump(begin); + m_asm << end.tag(); + } + else if (us == "ALLOC") + { + requireSize(1); + requireDeposit(0, 1); + + m_asm.append(Instruction::MSIZE); + m_asm.append(u256(0)); + m_asm.append(u256(1)); + m_asm.append(code[0].m_asm, 1); + m_asm.append(Instruction::MSIZE); + m_asm.append(Instruction::ADD); + m_asm.append(Instruction::SUB); + m_asm.append(Instruction::MSTORE8); + + _s.usedAlloc = true; + } + else if (us == "LLL") + { + requireMinSize(2); + requireMaxSize(3); + requireDeposit(1, 1); + + auto subPush = m_asm.appendSubSize(code[0].assembly(ns)); + m_asm.append(Instruction::DUP1); + if (code.size() == 3) + { + requireDeposit(2, 1); + m_asm.append(code[2].m_asm, 1); + m_asm.append(Instruction::LT); + m_asm.append(Instruction::ISZERO); + m_asm.append(Instruction::MUL); + m_asm.append(Instruction::DUP1); + } + m_asm.append(subPush); + m_asm.append(code[1].m_asm, 1); + m_asm.append(Instruction::CODECOPY); + } + else if (us == "&&" || us == "||") + { + requireMinSize(1); + for (unsigned i = 0; i < code.size(); ++i) + requireDeposit(i, 1); + + auto end = m_asm.newTag(); + if (code.size() > 1) + { + m_asm.append((u256)(us == "||" ? 1 : 0)); + for (unsigned i = 1; i < code.size(); ++i) + { + // Check if true - predicate + m_asm.append(code[i - 1].m_asm, 1); + if (us == "&&") + m_asm.append(Instruction::ISZERO); + m_asm.appendJumpI(end); + } + m_asm.append(Instruction::POP); + } + + // Check if true - predicate + m_asm.append(code.back().m_asm, 1); + + // At end now. + m_asm.append(end); + } + else if (us == "~") + { + requireSize(1); + requireDeposit(0, 1); + + m_asm.append(code[0].m_asm, 1); + m_asm.append((u256)1); + m_asm.append((u256)0); + m_asm.append(Instruction::SUB); + m_asm.append(Instruction::SUB); + } + else if (us == "SEQ") + { + unsigned ii = 0; + for (auto const& i: code) + if (++ii < code.size()) + m_asm.append(i.m_asm, 0); + else + m_asm.append(i.m_asm); + } + else if (us == "RAW") + { + for (auto const& i: code) + m_asm.append(i.m_asm); + m_asm.popTo(1); + } + else if (us.find_first_of("1234567890") != 0 && us.find_first_not_of("QWERTYUIOPASDFGHJKLZXCVBNM1234567890_") == string::npos) + m_asm.append((u256)varAddress(s)); + else + error<InvalidOperation>(); + } +} + +CodeFragment CodeFragment::compile(string const& _src, CompilerState& _s) +{ + CodeFragment ret; + sp::utree o; + parseTreeLLL(_src, o); + if (!o.empty()) + ret = CodeFragment(o, _s); + _s.treesToKill.push_back(o); + return ret; +} diff --git a/liblll/CodeFragment.h b/liblll/CodeFragment.h new file mode 100644 index 00000000..03f812b6 --- /dev/null +++ b/liblll/CodeFragment.h @@ -0,0 +1,64 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file CodeFragment.h + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#pragma once + +#include <libdevcore/Common.h> +#include <libevmcore/Instruction.h> +#include <libevmasm/Assembly.h> +#include "Exceptions.h" + +namespace boost { namespace spirit { class utree; } } +namespace sp = boost::spirit; + +namespace dev +{ +namespace eth +{ + +struct CompilerState; + +class CodeFragment +{ +public: + CodeFragment() {} + CodeFragment(sp::utree const& _t, CompilerState& _s, bool _allowASM = false); + + static CodeFragment compile(std::string const& _src, CompilerState& _s); + + /// Consolidates data and compiles code. + Assembly& assembly(CompilerState const& _cs) { finalise(_cs); return m_asm; } + +private: + void finalise(CompilerState const& _cs); + + template <class T> void error() const { BOOST_THROW_EXCEPTION(T() ); } + void constructOperation(sp::utree const& _t, CompilerState& _s); + + bool m_finalised = false; + Assembly m_asm; +}; + +static const CodeFragment NullCodeFragment; + +} +} + diff --git a/liblll/Compiler.cpp b/liblll/Compiler.cpp new file mode 100644 index 00000000..f1538789 --- /dev/null +++ b/liblll/Compiler.cpp @@ -0,0 +1,95 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Compiler.cpp + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#include "Compiler.h" +#include "Parser.h" +#include "CompilerState.h" +#include "CodeFragment.h" + +using namespace std; +using namespace dev; +using namespace dev::eth; + +bytes dev::eth::compileLLL(string const& _src, bool _opt, vector<string>* _errors) +{ + try + { + CompilerState cs; + cs.populateStandard(); + auto f = CodeFragment::compile(_src, cs); + bytes ret = f.assembly(cs).optimise(_opt).assemble().bytecode; + for (auto i: cs.treesToKill) + killBigints(i); + return ret; + } + catch (Exception const& _e) + { + if (_errors) + { + _errors->push_back("Parse error."); + _errors->push_back(diagnostic_information(_e)); + } + } + catch (std::exception) + { + if (_errors) + _errors->push_back("Parse error."); + } + return bytes(); +} + +std::string dev::eth::compileLLLToAsm(std::string const& _src, bool _opt, std::vector<std::string>* _errors) +{ + try + { + CompilerState cs; + cs.populateStandard(); + string ret = CodeFragment::compile(_src, cs).assembly(cs).optimise(_opt).out(); + for (auto i: cs.treesToKill) + killBigints(i); + return ret; + } + catch (Exception const& _e) + { + if (_errors) + _errors->push_back(diagnostic_information(_e)); + } + catch (std::exception) + { + if (_errors) + _errors->push_back("Parse error."); + } + return string(); +} + +string dev::eth::parseLLL(string const& _src) +{ + sp::utree o; + try + { + parseTreeLLL(_src, o); + } + catch (...) {} + ostringstream ret; + debugOutAST(ret, o); + killBigints(o); + return ret.str(); +} diff --git a/liblll/Compiler.h b/liblll/Compiler.h new file mode 100644 index 00000000..0fadd278 --- /dev/null +++ b/liblll/Compiler.h @@ -0,0 +1,38 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Compiler.h + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#pragma once + +#include <string> +#include <vector> +#include <libdevcore/Common.h> + +namespace dev +{ +namespace eth +{ + +std::string parseLLL(std::string const& _src); +std::string compileLLLToAsm(std::string const& _src, bool _opt = true, std::vector<std::string>* _errors = nullptr); +bytes compileLLL(std::string const& _src, bool _opt = true, std::vector<std::string>* _errors = nullptr); + +} +} diff --git a/liblll/CompilerState.cpp b/liblll/CompilerState.cpp new file mode 100644 index 00000000..63351bc4 --- /dev/null +++ b/liblll/CompilerState.cpp @@ -0,0 +1,82 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file CompilerState.cpp + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#include "CompilerState.h" +#include "CodeFragment.h" + +using namespace std; +using namespace dev; +using namespace dev::eth; + +CompilerState::CompilerState() +{ +} + +CodeFragment const& CompilerState::getDef(std::string const& _s) +{ + if (defs.count(_s)) + return defs.at(_s); + else if (args.count(_s)) + return args.at(_s); + else if (outers.count(_s)) + return outers.at(_s); + else + return NullCodeFragment; +} + +void CompilerState::populateStandard() +{ + static const string s = "{" + "(def 'gav 0x51ba59315b3a95761d0863b05ccc7a7f54703d99)" + "(def 'config 0x661005d2720d855f1d9976f88bb10c1a3398c77f)" + "(def 'allgas (- (gas) 21))" + "(def 'send (to value) (call allgas to value 0 0 0 0))" + "(def 'send (gaslimit to value) (call gaslimit to value 0 0 0 0))" + "(def 'msg (gaslimit to value data datasize outsize) { (set x outsize) (set y (alloc @32)) (call gaslimit to value data datasize @0 @32) @0 })" + "(def 'msg (gaslimit to value data datasize) { (call gaslimit to value data datasize 0 32) @0 })" + "(def 'msg (gaslimit to value data) { [0]:data (msg gaslimit to value 0 32) })" + "(def 'msg (to value data) { [0]:data (msg allgas to value 0 32) })" + "(def 'msg (to data) { [0]:data (msg allgas to 0 0 32) })" + "(def 'create (value code) { [0]:(msize) (create value @0 (lll code @0)) })" + "(def 'create (code) { [0]:(msize) (create 0 @0 (lll code @0)) })" + "(def 'sha3 (val) { [0]:val (sha3 0 32) })" + "(def 'sha3pair (a b) { [0]:a [32]:b (sha3 0 64) })" + "(def 'sha3trip (a b c) { [0]:a [32]:b [64]:c (sha3 0 96) })" + "(def 'return (val) { [0]:val (return 0 32) })" + "(def 'returnlll (code) (return 0 (lll code 0)) )" + "(def 'makeperm (name pos) { (def name (sload pos)) (def name (v) (sstore pos v)) } )" + "(def 'permcount 0)" + "(def 'perm (name) { (makeperm name permcount) (def 'permcount (+ permcount 1)) } )" + "(def 'namereg (msg config 0))" + "(def 'coinreg (msg config 1))" + "(def 'gavcoin (msg config 2))" + "(def 'sendgavcoin (to value) { [32]'send [64]:to [96]:value (call allgas gavcoin 0 32 96 0 0) })" + "(def 'regname (name) { [32]'register [64]name (call allgas namereg 0 32 64 0 0) })" + "(def 'regcoin (name) { [32]name (call allgas coinreg 0 32 32 0 0) })" + "(def 'regcoin (name denom) { [32]name [64]denom (call allgas coinreg 0 32 64 0 0) })" + "(def 'ecrecover (r s v hash) { [0] r [32] s [64] v [96] hash (msg allgas 1 0 0 128) })" + "(def 'sha256 (data datasize) (msg allgas 2 0 data datasize))" + "(def 'ripemd160 (data datasize) (msg allgas 3 0 data datasize))" + "(def 'sha256 (val) { [0]:val (sha256 0 32) })" + "(def 'ripemd160 (val) { [0]:val (ripemd160 0 32) })" + "}"; + CodeFragment::compile(s, *this); +} diff --git a/liblll/CompilerState.h b/liblll/CompilerState.h new file mode 100644 index 00000000..bfe56f92 --- /dev/null +++ b/liblll/CompilerState.h @@ -0,0 +1,57 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file CompilerState.h + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#pragma once + +#include <boost/spirit/include/support_utree.hpp> +#include "CodeFragment.h" + +namespace dev +{ +namespace eth +{ + +struct Macro +{ + std::vector<std::string> args; + boost::spirit::utree code; + std::map<std::string, CodeFragment> env; +}; + +struct CompilerState +{ + CompilerState(); + + CodeFragment const& getDef(std::string const& _s); + void populateStandard(); + + unsigned stackSize = 128; + std::map<std::string, std::pair<unsigned, unsigned>> vars; ///< maps name to stack offset & size. + std::map<std::string, CodeFragment> defs; + std::map<std::string, CodeFragment> args; + std::map<std::string, CodeFragment> outers; + std::map<std::pair<std::string, unsigned>, Macro> macros; + std::vector<boost::spirit::utree> treesToKill; + bool usedAlloc = false; +}; + +} +} diff --git a/liblll/Exceptions.h b/liblll/Exceptions.h new file mode 100644 index 00000000..1e9671b3 --- /dev/null +++ b/liblll/Exceptions.h @@ -0,0 +1,44 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Exceptions.h + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#pragma once + +#include <libdevcore/Exceptions.h> + +namespace dev +{ +namespace eth +{ + +/// Compile a Low-level Lisp-like Language program into EVM-code. +class CompilerException: public dev::Exception {}; +class InvalidOperation: public CompilerException {}; +class IntegerOutOfRange: public CompilerException {}; +class EmptyList: public CompilerException {}; +class DataNotExecutable: public CompilerException {}; +class IncorrectParameterCount: public CompilerException {}; +class InvalidName: public CompilerException {}; +class InvalidMacroArgs: public CompilerException {}; +class InvalidLiteral: public CompilerException {}; +class BareSymbol: public CompilerException {}; + +} +} diff --git a/liblll/Parser.cpp b/liblll/Parser.cpp new file mode 100644 index 00000000..df30f389 --- /dev/null +++ b/liblll/Parser.cpp @@ -0,0 +1,145 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Parser.cpp + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#include "Parser.h" + +#define BOOST_RESULT_OF_USE_DECLTYPE +#define BOOST_SPIRIT_USE_PHOENIX_V3 +#include <boost/spirit/include/qi.hpp> +#include <boost/spirit/include/phoenix.hpp> +#include <boost/spirit/include/support_utree.hpp> + +using namespace std; +using namespace dev; +using namespace dev::eth; +namespace qi = boost::spirit::qi; +namespace px = boost::phoenix; +namespace sp = boost::spirit; + +void dev::eth::killBigints(sp::utree const& _this) +{ + switch (_this.which()) + { + case sp::utree_type::list_type: for (auto const& i: _this) killBigints(i); break; + case sp::utree_type::any_type: delete _this.get<bigint*>(); break; + default:; + } +} + +void dev::eth::debugOutAST(ostream& _out, sp::utree const& _this) +{ + switch (_this.which()) + { + case sp::utree_type::list_type: + switch (_this.tag()) + { + case 0: _out << "( "; for (auto const& i: _this) { debugOutAST(_out, i); _out << " "; } _out << ")"; break; + case 1: _out << "@ "; debugOutAST(_out, _this.front()); break; + case 2: _out << "@@ "; debugOutAST(_out, _this.front()); break; + case 3: _out << "[ "; debugOutAST(_out, _this.front()); _out << " ] "; debugOutAST(_out, _this.back()); break; + case 4: _out << "[[ "; debugOutAST(_out, _this.front()); _out << " ]] "; debugOutAST(_out, _this.back()); break; + case 5: _out << "{ "; for (auto const& i: _this) { debugOutAST(_out, i); _out << " "; } _out << "}"; break; + case 6: _out << "$ "; debugOutAST(_out, _this.front()); break; + default:; + } + + break; + case sp::utree_type::int_type: _out << _this.get<int>(); break; + case sp::utree_type::string_type: _out << "\"" << _this.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::string_type>>() << "\""; break; + case sp::utree_type::symbol_type: _out << _this.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::symbol_type>>(); break; + case sp::utree_type::any_type: _out << *_this.get<bigint*>(); break; + default: _out << "nil"; + } +} + +namespace dev { namespace eth { +namespace parseTreeLLL_ { + +template<unsigned N> +struct tagNode +{ + void operator()(sp::utree& n, qi::rule<string::const_iterator, qi::ascii::space_type, sp::utree()>::context_type& c) const + { + (boost::fusion::at_c<0>(c.attributes) = n).tag(N); + } +}; + +}}} + +void dev::eth::parseTreeLLL(string const& _s, sp::utree& o_out) +{ + using qi::standard::space; + using qi::standard::space_type; + using dev::eth::parseTreeLLL_::tagNode; + using symbol_type = sp::basic_string<std::string, sp::utree_type::symbol_type>; + using it = string::const_iterator; + + static const u256 ether = u256(1000000000) * 1000000000; + static const u256 finney = u256(1000000000) * 1000000; + static const u256 szabo = u256(1000000000) * 1000; + + qi::rule<it, space_type, sp::utree()> element; + qi::rule<it, string()> str = '"' > qi::lexeme[+(~qi::char_(std::string("\"") + '\0'))] > '"'; + qi::rule<it, string()> strsh = '\'' > qi::lexeme[+(~qi::char_(std::string(" ;$@()[]{}:\n\t") + '\0'))]; + qi::rule<it, symbol_type()> symbol = qi::lexeme[+(~qi::char_(std::string(" $@[]{}:();\"\x01-\x1f\x7f") + '\0'))]; + qi::rule<it, string()> intstr = qi::lexeme[ qi::no_case["0x"][qi::_val = "0x"] >> *qi::char_("0-9a-fA-F")[qi::_val += qi::_1]] | qi::lexeme[+qi::char_("0-9")[qi::_val += qi::_1]]; + qi::rule<it, bigint()> integer = intstr; + qi::rule<it, bigint()> multiplier = qi::lit("wei")[qi::_val = 1] | qi::lit("szabo")[qi::_val = szabo] | qi::lit("finney")[qi::_val = finney] | qi::lit("ether")[qi::_val = ether]; + qi::rule<it, space_type, bigint()> quantity = integer[qi::_val = qi::_1] >> -multiplier[qi::_val *= qi::_1]; + qi::rule<it, space_type, sp::utree()> atom = quantity[qi::_val = px::construct<sp::any_ptr>(px::new_<bigint>(qi::_1))] | (str | strsh)[qi::_val = qi::_1] | symbol[qi::_val = qi::_1]; + qi::rule<it, space_type, sp::utree::list_type()> seq = '{' > *element > '}'; + qi::rule<it, space_type, sp::utree::list_type()> mload = '@' > element; + qi::rule<it, space_type, sp::utree::list_type()> sload = qi::lit("@@") > element; + qi::rule<it, space_type, sp::utree::list_type()> mstore = '[' > element > ']' > -qi::lit(":") > element; + qi::rule<it, space_type, sp::utree::list_type()> sstore = qi::lit("[[") > element > qi::lit("]]") > -qi::lit(":") > element; + qi::rule<it, space_type, sp::utree::list_type()> calldataload = qi::lit("$") > element; + qi::rule<it, space_type, sp::utree::list_type()> list = '(' > *element > ')'; + + qi::rule<it, space_type, sp::utree()> extra = sload[tagNode<2>()] | mload[tagNode<1>()] | sstore[tagNode<4>()] | mstore[tagNode<3>()] | seq[tagNode<5>()] | calldataload[tagNode<6>()]; + element = atom | list | extra; + + string s; + s.reserve(_s.size()); + bool incomment = false; + bool instring = false; + bool insstring = false; + for (auto i: _s) + { + if (i == ';' && !instring && !insstring) + incomment = true; + else if (i == '\n') + incomment = instring = insstring = false; + else if (i == '"' && !insstring) + instring = !instring; + else if (i == '\'') + insstring = true; + else if (i == ' ') + insstring = false; + if (!incomment) + s.push_back(i); + } + auto ret = s.cbegin(); + qi::phrase_parse(ret, s.cend(), element, space, qi::skip_flag::dont_postskip, o_out); + for (auto i = ret; i != s.cend(); ++i) + if (!isspace(*i)) + BOOST_THROW_EXCEPTION(std::exception()); +} + diff --git a/liblll/Parser.h b/liblll/Parser.h new file mode 100644 index 00000000..b21989f0 --- /dev/null +++ b/liblll/Parser.h @@ -0,0 +1,41 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Parser.h + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#pragma once + +#include <string> +#include <vector> +#include <libdevcore/Common.h> + +namespace boost { namespace spirit { class utree; } } +namespace sp = boost::spirit; + +namespace dev +{ +namespace eth +{ + +void killBigints(sp::utree const& _this); +void parseTreeLLL(std::string const& _s, sp::utree& o_out); +void debugOutAST(std::ostream& _out, sp::utree const& _this); + +} +} diff --git a/libsolidity/CMakeLists.txt b/libsolidity/CMakeLists.txt index fa2f4e9c..ab9ee1e2 100644 --- a/libsolidity/CMakeLists.txt +++ b/libsolidity/CMakeLists.txt @@ -14,7 +14,7 @@ file(GLOB HEADERS "*/*.h") include_directories(BEFORE ..) add_library(${EXECUTABLE} ${SRC_LIST} ${HEADERS}) -eth_use(${EXECUTABLE} REQUIRED Dev::devcore Eth::evmcore Eth::evmasm) +eth_use(${EXECUTABLE} REQUIRED Dev::devcore Eth::evmcore Solidity::evmasm) install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) diff --git a/lllc/CMakeLists.txt b/lllc/CMakeLists.txt new file mode 100644 index 00000000..989b8cbf --- /dev/null +++ b/lllc/CMakeLists.txt @@ -0,0 +1,12 @@ +aux_source_directory(. SRC_LIST) + +set(EXECUTABLE lllc) + +file(GLOB HEADERS "*.h") +include_directories(BEFORE ..) +add_executable(${EXECUTABLE} ${SRC_LIST} ${HEADERS}) + +eth_use(${EXECUTABLE} REQUIRED Solidity::lll Dev::buildinfo) + +install( TARGETS ${EXECUTABLE} DESTINATION bin ) + diff --git a/lllc/main.cpp b/lllc/main.cpp new file mode 100644 index 00000000..5f3078c2 --- /dev/null +++ b/lllc/main.cpp @@ -0,0 +1,124 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file main.cpp + * @author Gav Wood <i@gavwood.com> + * @date 2014 + * Ethereum client. + */ + +#include <fstream> +#include <iostream> +#include <liblll/Compiler.h> +#include <libdevcore/CommonIO.h> +#include <libdevcore/CommonData.h> +#include <libevmcore/Instruction.h> +#include "ethereum/BuildInfo.h" +using namespace std; +using namespace dev; +using namespace dev::eth; + +void help() +{ + cout + << "Usage lllc [OPTIONS] <file>" << endl + << "Options:" << endl + << " -b,--binary Parse, compile and assemble; output byte code in binary." << endl + << " -x,--hex Parse, compile and assemble; output byte code in hex." << endl + << " -a,--assembly Only parse and compile; show assembly." << endl + << " -t,--parse-tree Only parse; show parse tree." << endl + << " -h,--help Show this help message and exit." << endl + << " -V,--version Show the version and exit." << endl; + exit(0); +} + +void version() +{ + cout << "LLLC, the Lovely Little Language Compiler " << dev::Version << endl; + cout << " By Gav Wood, (c) 2014." << endl; + cout << "Build: " << DEV_QUOTED(ETH_BUILD_PLATFORM) << "/" << DEV_QUOTED(ETH_BUILD_TYPE) << endl; + exit(0); +} + +enum Mode { Binary, Hex, Assembly, ParseTree, Disassemble }; + +int main(int argc, char** argv) +{ + unsigned optimise = 1; + string infile; + Mode mode = Hex; + + for (int i = 1; i < argc; ++i) + { + string arg = argv[i]; + if (arg == "-h" || arg == "--help") + help(); + else if (arg == "-b" || arg == "--binary") + mode = Binary; + else if (arg == "-x" || arg == "--hex") + mode = Hex; + else if (arg == "-a" || arg == "--assembly") + mode = Assembly; + else if (arg == "-t" || arg == "--parse-tree") + mode = ParseTree; + else if ((arg == "-o" || arg == "--optimise") && argc > i + 1) + optimise = atoi(argv[++i]); + else if (arg == "-d" || arg == "--disassemble") + mode = Disassemble; + else if (arg == "-V" || arg == "--version") + version(); + else + infile = argv[i]; + } + + string src; + if (infile.empty()) + { + string s; + while (!cin.eof()) + { + getline(cin, s); + src.append(s); + } + } + else + src = contentsString(infile); + + vector<string> errors; + if (src.empty()) + errors.push_back("Empty file."); + else if (mode == Disassemble) + { + cout << disassemble(fromHex(src)) << endl; + } + else if (mode == Binary || mode == Hex) + { + auto bs = compileLLL(src, optimise ? true : false, &errors); + if (mode == Hex) + cout << toHex(bs) << endl; + else if (mode == Binary) + cout.write((char const*)bs.data(), bs.size()); + } + else if (mode == ParseTree) + cout << parseLLL(src) << endl; + else if (mode == Assembly) + cout << compileLLLToAsm(src, optimise ? true : false, &errors) << endl; + for (auto const& i: errors) + cerr << i << endl; + if ( errors.size() ) + return 1; + return 0; +} |