aboutsummaryrefslogtreecommitdiffstats
path: root/libevmasm
diff options
context:
space:
mode:
Diffstat (limited to 'libevmasm')
-rw-r--r--libevmasm/Assembly.cpp2
-rw-r--r--libevmasm/AssemblyItem.cpp8
-rw-r--r--libevmasm/AssemblyItem.h45
-rw-r--r--libevmasm/CommonSubexpressionEliminator.cpp2
-rw-r--r--libevmasm/ConstantOptimiser.cpp78
-rw-r--r--libevmasm/ConstantOptimiser.h19
-rw-r--r--libevmasm/ExpressionClasses.cpp343
-rw-r--r--libevmasm/ExpressionClasses.h65
-rw-r--r--libevmasm/SimplificationRules.cpp370
-rw-r--r--libevmasm/SimplificationRules.h140
10 files changed, 632 insertions, 440 deletions
diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp
index f50a38a6..845abfd4 100644
--- a/libevmasm/Assembly.cpp
+++ b/libevmasm/Assembly.cpp
@@ -416,7 +416,7 @@ LinkerObject const& Assembly::assemble() const
switch (i.type())
{
case Operation:
- ret.bytecode.push_back((byte)i.data());
+ ret.bytecode.push_back((byte)i.instruction());
break;
case PushString:
{
diff --git a/libevmasm/AssemblyItem.cpp b/libevmasm/AssemblyItem.cpp
index b790e094..6c7d5425 100644
--- a/libevmasm/AssemblyItem.cpp
+++ b/libevmasm/AssemblyItem.cpp
@@ -29,19 +29,19 @@ using namespace dev::eth;
AssemblyItem AssemblyItem::toSubAssemblyTag(size_t _subId) const
{
- assertThrow(m_data < (u256(1) << 64), Exception, "Tag already has subassembly set.");
+ assertThrow(data() < (u256(1) << 64), Exception, "Tag already has subassembly set.");
assertThrow(m_type == PushTag || m_type == Tag, Exception, "");
AssemblyItem r = *this;
r.m_type = PushTag;
- r.setPushTagSubIdAndTag(_subId, size_t(m_data));
+ r.setPushTagSubIdAndTag(_subId, size_t(data()));
return r;
}
pair<size_t, size_t> AssemblyItem::splitForeignPushTag() const
{
assertThrow(m_type == PushTag || m_type == Tag, Exception, "");
- return make_pair(size_t(m_data / (u256(1) << 64)) - 1, size_t(m_data));
+ return make_pair(size_t((data()) / (u256(1) << 64)) - 1, size_t(data()));
}
void AssemblyItem::setPushTagSubIdAndTag(size_t _subId, size_t _tag)
@@ -60,7 +60,7 @@ unsigned AssemblyItem::bytesRequired(unsigned _addressLength) const
case PushString:
return 33;
case Push:
- return 1 + max<unsigned>(1, dev::bytesRequired(m_data));
+ return 1 + max<unsigned>(1, dev::bytesRequired(data()));
case PushSubSize:
case PushProgramSize:
return 4; // worst case: a 16MB program
diff --git a/libevmasm/AssemblyItem.h b/libevmasm/AssemblyItem.h
index cddfb17c..002b3c87 100644
--- a/libevmasm/AssemblyItem.h
+++ b/libevmasm/AssemblyItem.h
@@ -59,16 +59,22 @@ public:
AssemblyItem(u256 _push, SourceLocation const& _location = SourceLocation()):
AssemblyItem(Push, _push, _location) { }
AssemblyItem(solidity::Instruction _i, SourceLocation const& _location = SourceLocation()):
- AssemblyItem(Operation, byte(_i), _location) { }
+ m_type(Operation),
+ m_instruction(_i),
+ m_location(_location)
+ {}
AssemblyItem(AssemblyItemType _type, u256 _data = 0, SourceLocation const& _location = SourceLocation()):
m_type(_type),
- m_data(_data),
m_location(_location)
{
+ if (m_type == Operation)
+ m_instruction = Instruction(byte(_data));
+ else
+ m_data = std::make_shared<u256>(_data);
}
- AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, m_data); }
- AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, m_data); }
+ AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, data()); }
+ AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, data()); }
/// Converts the tag to a subassembly tag. This has to be called in order to move a tag across assemblies.
/// @param _subId the identifier of the subassembly the tag is taken from.
AssemblyItem toSubAssemblyTag(size_t _subId) const;
@@ -79,18 +85,33 @@ public:
void setPushTagSubIdAndTag(size_t _subId, size_t _tag);
AssemblyItemType type() const { return m_type; }
- u256 const& data() const { return m_data; }
- void setType(AssemblyItemType const _type) { m_type = _type; }
- void setData(u256 const& _data) { m_data = _data; }
+ u256 const& data() const { assertThrow(m_type != Operation, Exception, ""); return *m_data; }
+ void setData(u256 const& _data) { assertThrow(m_type != Operation, Exception, ""); m_data = std::make_shared<u256>(_data); }
/// @returns the instruction of this item (only valid if type() == Operation)
- Instruction instruction() const { return Instruction(byte(m_data)); }
+ Instruction instruction() const { assertThrow(m_type == Operation, Exception, ""); return m_instruction; }
/// @returns true if the type and data of the items are equal.
- bool operator==(AssemblyItem const& _other) const { return m_type == _other.m_type && m_data == _other.m_data; }
+ bool operator==(AssemblyItem const& _other) const
+ {
+ if (type() != _other.type())
+ return false;
+ if (type() == Operation)
+ return instruction() == _other.instruction();
+ else
+ return data() == _other.data();
+ }
bool operator!=(AssemblyItem const& _other) const { return !operator==(_other); }
/// Less-than operator compatible with operator==.
- bool operator<(AssemblyItem const& _other) const { return std::tie(m_type, m_data) < std::tie(_other.m_type, _other.m_data); }
+ bool operator<(AssemblyItem const& _other) const
+ {
+ if (type() != _other.type())
+ return type() < _other.type();
+ else if (type() == Operation)
+ return instruction() < _other.instruction();
+ else
+ return data() < _other.data();
+ }
/// @returns an upper bound for the number of bytes required by this item, assuming that
/// the value of a jump tag takes @a _addressLength bytes.
@@ -100,7 +121,6 @@ public:
/// @returns true if the assembly item can be used in a functional context.
bool canBeFunctional() const;
- bool match(AssemblyItem const& _i) const { return _i.m_type == UndefinedItem || (m_type == _i.m_type && (m_type != Operation || m_data == _i.m_data)); }
void setLocation(SourceLocation const& _location) { m_location = _location; }
SourceLocation const& location() const { return m_location; }
@@ -115,7 +135,8 @@ public:
private:
AssemblyItemType m_type;
- u256 m_data;
+ Instruction m_instruction; ///< Only valid if m_type == Operation
+ std::shared_ptr<u256> m_data; ///< Only valid if m_type != Operation
SourceLocation m_location;
JumpType m_jumpType = JumpType::Ordinary;
/// Pushed value for operations with data to be determined during assembly stage,
diff --git a/libevmasm/CommonSubexpressionEliminator.cpp b/libevmasm/CommonSubexpressionEliminator.cpp
index 6294e579..fd4fffa6 100644
--- a/libevmasm/CommonSubexpressionEliminator.cpp
+++ b/libevmasm/CommonSubexpressionEliminator.cpp
@@ -303,7 +303,9 @@ void CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced)
for (auto it: m_classPositions)
for (auto p: it.second)
if (p > m_stackHeight)
+ {
assertThrow(false, OptimizerException, "");
+ }
// do some cleanup
removeStackTopIfPossible();
diff --git a/libevmasm/ConstantOptimiser.cpp b/libevmasm/ConstantOptimiser.cpp
index f4a50c2d..86244e17 100644
--- a/libevmasm/ConstantOptimiser.cpp
+++ b/libevmasm/ConstantOptimiser.cpp
@@ -38,6 +38,7 @@ unsigned ConstantOptimisationMethod::optimiseConstants(
for (AssemblyItem const& item: _items)
if (item.type() == Push)
pushes[item]++;
+ map<u256, AssemblyItems> pendingReplacements;
for (auto it: pushes)
{
AssemblyItem const& item = it.first;
@@ -53,17 +54,22 @@ unsigned ConstantOptimisationMethod::optimiseConstants(
bigint copyGas = copy.gasNeeded();
ComputeMethod compute(params, item.data());
bigint computeGas = compute.gasNeeded();
+ AssemblyItems replacement;
if (copyGas < literalGas && copyGas < computeGas)
{
- copy.execute(_assembly, _items);
+ replacement = copy.execute(_assembly);
optimisations++;
}
- else if (computeGas < literalGas && computeGas < copyGas)
+ else if (computeGas < literalGas && computeGas <= copyGas)
{
- compute.execute(_assembly, _items);
+ replacement = compute.execute(_assembly);
optimisations++;
}
+ if (!replacement.empty())
+ pendingReplacements[item.data()] = replacement;
}
+ if (!pendingReplacements.empty())
+ replaceConstants(_items, pendingReplacements);
return optimisations;
}
@@ -101,18 +107,24 @@ size_t ConstantOptimisationMethod::bytesRequired(AssemblyItems const& _items)
void ConstantOptimisationMethod::replaceConstants(
AssemblyItems& _items,
- AssemblyItems const& _replacement
-) const
+ map<u256, AssemblyItems> const& _replacements
+)
{
- assertThrow(_items.size() > 0, OptimizerException, "");
- for (size_t i = 0; i < _items.size(); ++i)
+ AssemblyItems replaced;
+ for (AssemblyItem const& item: _items)
{
- if (_items.at(i) != AssemblyItem(m_value))
- continue;
- _items[i] = _replacement[0];
- _items.insert(_items.begin() + i + 1, _replacement.begin() + 1, _replacement.end());
- i += _replacement.size() - 1;
+ if (item.type() == Push)
+ {
+ auto it = _replacements.find(item.data());
+ if (it != _replacements.end())
+ {
+ replaced += it->second;
+ continue;
+ }
+ }
+ replaced.push_back(item);
}
+ _items = std::move(replaced);
}
bigint LiteralMethod::gasNeeded()
@@ -128,38 +140,44 @@ bigint LiteralMethod::gasNeeded()
CodeCopyMethod::CodeCopyMethod(Params const& _params, u256 const& _value):
ConstantOptimisationMethod(_params, _value)
{
- m_copyRoutine = AssemblyItems{
- u256(0),
- Instruction::DUP1,
- Instruction::MLOAD, // back up memory
- u256(32),
- AssemblyItem(PushData, u256(1) << 16), // has to be replaced
- Instruction::DUP4,
- Instruction::CODECOPY,
- Instruction::DUP2,
- Instruction::MLOAD,
- Instruction::SWAP2,
- Instruction::MSTORE
- };
}
bigint CodeCopyMethod::gasNeeded()
{
return combineGas(
// Run gas: we ignore memory increase costs
- simpleRunGas(m_copyRoutine) + GasCosts::copyGas,
+ simpleRunGas(copyRoutine()) + GasCosts::copyGas,
// Data gas for copy routines: Some bytes are zero, but we ignore them.
- bytesRequired(m_copyRoutine) * (m_params.isCreation ? GasCosts::txDataNonZeroGas : GasCosts::createDataGas),
+ bytesRequired(copyRoutine()) * (m_params.isCreation ? GasCosts::txDataNonZeroGas : GasCosts::createDataGas),
// Data gas for data itself
dataGas(toBigEndian(m_value))
);
}
-void CodeCopyMethod::execute(Assembly& _assembly, AssemblyItems& _items)
+AssemblyItems CodeCopyMethod::execute(Assembly& _assembly)
{
bytes data = toBigEndian(m_value);
- m_copyRoutine[4] = _assembly.newData(data);
- replaceConstants(_items, m_copyRoutine);
+ AssemblyItems actualCopyRoutine = copyRoutine();
+ actualCopyRoutine[4] = _assembly.newData(data);
+ return actualCopyRoutine;
+}
+
+AssemblyItems const& CodeCopyMethod::copyRoutine() const
+{
+ AssemblyItems static copyRoutine{
+ u256(0),
+ Instruction::DUP1,
+ Instruction::MLOAD, // back up memory
+ u256(32),
+ AssemblyItem(PushData, u256(1) << 16), // has to be replaced
+ Instruction::DUP4,
+ Instruction::CODECOPY,
+ Instruction::DUP2,
+ Instruction::MLOAD,
+ Instruction::SWAP2,
+ Instruction::MSTORE
+ };
+ return copyRoutine;
}
AssemblyItems ComputeMethod::findRepresentation(u256 const& _value)
diff --git a/libevmasm/ConstantOptimiser.h b/libevmasm/ConstantOptimiser.h
index b35b2a69..dfa2fbf8 100644
--- a/libevmasm/ConstantOptimiser.h
+++ b/libevmasm/ConstantOptimiser.h
@@ -60,7 +60,10 @@ public:
explicit ConstantOptimisationMethod(Params const& _params, u256 const& _value):
m_params(_params), m_value(_value) {}
virtual bigint gasNeeded() = 0;
- virtual void execute(Assembly& _assembly, AssemblyItems& _items) = 0;
+ /// Executes the method, potentially appending to the assembly and returns a vector of
+ /// assembly items the constant should be relpaced with in one sweep.
+ /// If the vector is empty, the constants will not be deleted.
+ virtual AssemblyItems execute(Assembly& _assembly) = 0;
protected:
size_t dataSize() const { return std::max<size_t>(1, dev::bytesRequired(m_value)); }
@@ -83,8 +86,8 @@ protected:
return m_params.runs * _runGas + m_params.multiplicity * _repeatedDataGas + _uniqueDataGas;
}
- /// Replaces the constant by the code given in @a _replacement.
- void replaceConstants(AssemblyItems& _items, AssemblyItems const& _replacement) const;
+ /// Replaces all constants i by the code given in @a _replacement[i].
+ static void replaceConstants(AssemblyItems& _items, std::map<u256, AssemblyItems> const& _replacement);
Params m_params;
u256 const& m_value;
@@ -100,7 +103,7 @@ public:
explicit LiteralMethod(Params const& _params, u256 const& _value):
ConstantOptimisationMethod(_params, _value) {}
virtual bigint gasNeeded() override;
- virtual void execute(Assembly&, AssemblyItems&) override {}
+ virtual AssemblyItems execute(Assembly&) override { return AssemblyItems{}; }
};
/**
@@ -111,10 +114,10 @@ class CodeCopyMethod: public ConstantOptimisationMethod
public:
explicit CodeCopyMethod(Params const& _params, u256 const& _value);
virtual bigint gasNeeded() override;
- virtual void execute(Assembly& _assembly, AssemblyItems& _items) override;
+ virtual AssemblyItems execute(Assembly& _assembly) override;
protected:
- AssemblyItems m_copyRoutine;
+ AssemblyItems const& copyRoutine() const;
};
/**
@@ -130,9 +133,9 @@ public:
}
virtual bigint gasNeeded() override { return gasNeeded(m_routine); }
- virtual void execute(Assembly&, AssemblyItems& _items) override
+ virtual AssemblyItems execute(Assembly&) override
{
- replaceConstants(_items, m_routine);
+ return m_routine;
}
protected:
diff --git a/libevmasm/ExpressionClasses.cpp b/libevmasm/ExpressionClasses.cpp
index d5ccd7e3..fc283b0b 100644
--- a/libevmasm/ExpressionClasses.cpp
+++ b/libevmasm/ExpressionClasses.cpp
@@ -29,6 +29,7 @@
#include <boost/noncopyable.hpp>
#include <libevmasm/Assembly.h>
#include <libevmasm/CommonSubexpressionEliminator.h>
+#include <libevmasm/SimplificationRules.h>
using namespace std;
using namespace dev;
@@ -40,8 +41,18 @@ bool ExpressionClasses::Expression::operator<(ExpressionClasses::Expression cons
assertThrow(!!item && !!_other.item, OptimizerException, "");
auto type = item->type();
auto otherType = _other.item->type();
- return std::tie(type, item->data(), arguments, sequenceNumber) <
- std::tie(otherType, _other.item->data(), _other.arguments, _other.sequenceNumber);
+ if (type != otherType)
+ return type < otherType;
+ else if (type == Operation)
+ {
+ auto instr = item->instruction();
+ auto otherInstr = _other.item->instruction();
+ return std::tie(instr, arguments, sequenceNumber) <
+ std::tie(otherInstr, _other.arguments, _other.sequenceNumber);
+ }
+ else
+ return std::tie(item->data(), arguments, sequenceNumber) <
+ std::tie(_other.item->data(), _other.arguments, _other.sequenceNumber);
}
ExpressionClasses::Id ExpressionClasses::find(
@@ -170,191 +181,6 @@ string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const
return str.str();
}
-class Rules: public boost::noncopyable
-{
-public:
- Rules();
- void resetMatchGroups() { m_matchGroups.clear(); }
- vector<pair<Pattern, function<Pattern()>>> rules() const { return m_rules; }
-
-private:
- using Expression = ExpressionClasses::Expression;
- map<unsigned, Expression const*> m_matchGroups;
- vector<pair<Pattern, function<Pattern()>>> m_rules;
-};
-
-template <class S> S divWorkaround(S const& _a, S const& _b)
-{
- return (S)(bigint(_a) / bigint(_b));
-}
-
-template <class S> S modWorkaround(S const& _a, S const& _b)
-{
- return (S)(bigint(_a) % bigint(_b));
-}
-
-Rules::Rules()
-{
- // Multiple occurences of one of these inside one rule must match the same equivalence class.
- // Constants.
- Pattern A(Push);
- Pattern B(Push);
- Pattern C(Push);
- // Anything.
- Pattern X;
- Pattern Y;
- Pattern Z;
- A.setMatchGroup(1, m_matchGroups);
- B.setMatchGroup(2, m_matchGroups);
- C.setMatchGroup(3, m_matchGroups);
- X.setMatchGroup(4, m_matchGroups);
- Y.setMatchGroup(5, m_matchGroups);
- Z.setMatchGroup(6, m_matchGroups);
-
- m_rules = vector<pair<Pattern, function<Pattern()>>>{
- // arithmetics on constants
- {{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }},
- {{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }},
- {{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }},
- {{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : divWorkaround(A.d(), B.d()); }},
- {{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(divWorkaround(u2s(A.d()), u2s(B.d()))); }},
- {{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : modWorkaround(A.d(), B.d()); }},
- {{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(modWorkaround(u2s(A.d()), u2s(B.d()))); }},
- {{Instruction::EXP, {A, B}}, [=]{ return u256(boost::multiprecision::powm(bigint(A.d()), bigint(B.d()), bigint(1) << 256)); }},
- {{Instruction::NOT, {A}}, [=]{ return ~A.d(); }},
- {{Instruction::LT, {A, B}}, [=]() { return A.d() < B.d() ? u256(1) : 0; }},
- {{Instruction::GT, {A, B}}, [=]() -> u256 { return A.d() > B.d() ? 1 : 0; }},
- {{Instruction::SLT, {A, B}}, [=]() -> u256 { return u2s(A.d()) < u2s(B.d()) ? 1 : 0; }},
- {{Instruction::SGT, {A, B}}, [=]() -> u256 { return u2s(A.d()) > u2s(B.d()) ? 1 : 0; }},
- {{Instruction::EQ, {A, B}}, [=]() -> u256 { return A.d() == B.d() ? 1 : 0; }},
- {{Instruction::ISZERO, {A}}, [=]() -> u256 { return A.d() == 0 ? 1 : 0; }},
- {{Instruction::AND, {A, B}}, [=]{ return A.d() & B.d(); }},
- {{Instruction::OR, {A, B}}, [=]{ return A.d() | B.d(); }},
- {{Instruction::XOR, {A, B}}, [=]{ return A.d() ^ B.d(); }},
- {{Instruction::BYTE, {A, B}}, [=]{ return A.d() >= 32 ? 0 : (B.d() >> unsigned(8 * (31 - A.d()))) & 0xff; }},
- {{Instruction::ADDMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) + bigint(B.d())) % C.d()); }},
- {{Instruction::MULMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) * bigint(B.d())) % C.d()); }},
- {{Instruction::MULMOD, {A, B, C}}, [=]{ return A.d() * B.d(); }},
- {{Instruction::SIGNEXTEND, {A, B}}, [=]() -> u256 {
- if (A.d() >= 31)
- return B.d();
- unsigned testBit = unsigned(A.d()) * 8 + 7;
- u256 mask = (u256(1) << testBit) - 1;
- return u256(boost::multiprecision::bit_test(B.d(), testBit) ? B.d() | ~mask : B.d() & mask);
- }},
-
- // invariants involving known constants
- {{Instruction::ADD, {X, 0}}, [=]{ return X; }},
- {{Instruction::SUB, {X, 0}}, [=]{ return X; }},
- {{Instruction::MUL, {X, 1}}, [=]{ return X; }},
- {{Instruction::DIV, {X, 1}}, [=]{ return X; }},
- {{Instruction::SDIV, {X, 1}}, [=]{ return X; }},
- {{Instruction::OR, {X, 0}}, [=]{ return X; }},
- {{Instruction::XOR, {X, 0}}, [=]{ return X; }},
- {{Instruction::AND, {X, ~u256(0)}}, [=]{ return X; }},
- {{Instruction::AND, {X, 0}}, [=]{ return u256(0); }},
- {{Instruction::MUL, {X, 0}}, [=]{ return u256(0); }},
- {{Instruction::DIV, {X, 0}}, [=]{ return u256(0); }},
- {{Instruction::DIV, {0, X}}, [=]{ return u256(0); }},
- {{Instruction::MOD, {X, 0}}, [=]{ return u256(0); }},
- {{Instruction::MOD, {0, X}}, [=]{ return u256(0); }},
- {{Instruction::OR, {X, ~u256(0)}}, [=]{ return ~u256(0); }},
- {{Instruction::EQ, {X, 0}}, [=]() -> Pattern { return {Instruction::ISZERO, {X}}; } },
- // operations involving an expression and itself
- {{Instruction::AND, {X, X}}, [=]{ return X; }},
- {{Instruction::OR, {X, X}}, [=]{ return X; }},
- {{Instruction::XOR, {X, X}}, [=]{ return u256(0); }},
- {{Instruction::SUB, {X, X}}, [=]{ return u256(0); }},
- {{Instruction::EQ, {X, X}}, [=]{ return u256(1); }},
- {{Instruction::LT, {X, X}}, [=]{ return u256(0); }},
- {{Instruction::SLT, {X, X}}, [=]{ return u256(0); }},
- {{Instruction::GT, {X, X}}, [=]{ return u256(0); }},
- {{Instruction::SGT, {X, X}}, [=]{ return u256(0); }},
- {{Instruction::MOD, {X, X}}, [=]{ return u256(0); }},
-
- {{Instruction::NOT, {{Instruction::NOT, {X}}}}, [=]{ return X; }},
- {{Instruction::XOR, {{{X}, {Instruction::XOR, {X, Y}}}}}, [=]{ return Y; }},
- {{Instruction::OR, {{{X}, {Instruction::AND, {X, Y}}}}}, [=]{ return X; }},
- {{Instruction::AND, {{{X}, {Instruction::OR, {X, Y}}}}}, [=]{ return X; }},
- {{Instruction::AND, {{{X}, {Instruction::NOT, {X}}}}}, [=]{ return u256(0); }},
- {{Instruction::OR, {{{X}, {Instruction::NOT, {X}}}}}, [=]{ return ~u256(0); }},
- };
- // Double negation of opcodes with binary result
- for (auto const& op: vector<Instruction>{
- Instruction::EQ,
- Instruction::LT,
- Instruction::SLT,
- Instruction::GT,
- Instruction::SGT
- })
- m_rules.push_back({
- {Instruction::ISZERO, {{Instruction::ISZERO, {{op, {X, Y}}}}}},
- [=]() -> Pattern { return {op, {X, Y}}; }
- });
- m_rules.push_back({
- {Instruction::ISZERO, {{Instruction::ISZERO, {{Instruction::ISZERO, {X}}}}}},
- [=]() -> Pattern { return {Instruction::ISZERO, {X}}; }
- });
- m_rules.push_back({
- {Instruction::ISZERO, {{Instruction::XOR, {X, Y}}}},
- [=]() -> Pattern { return { Instruction::EQ, {X, Y} }; }
- });
- // Associative operations
- for (auto const& opFun: vector<pair<Instruction,function<u256(u256 const&,u256 const&)>>>{
- {Instruction::ADD, plus<u256>()},
- {Instruction::MUL, multiplies<u256>()},
- {Instruction::AND, bit_and<u256>()},
- {Instruction::OR, bit_or<u256>()},
- {Instruction::XOR, bit_xor<u256>()}
- })
- {
- auto op = opFun.first;
- auto fun = opFun.second;
- // Moving constants to the outside, order matters here!
- // we need actions that return expressions (or patterns?) here, and we need also reversed rules
- // (X+A)+B -> X+(A+B)
- m_rules += vector<pair<Pattern, function<Pattern()>>>{{
- {op, {{op, {X, A}}, B}},
- [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; }
- }, {
- // X+(Y+A) -> (X+Y)+A
- {op, {{op, {X, A}}, Y}},
- [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; }
- }, {
- // For now, we still need explicit commutativity for the inner pattern
- {op, {{op, {A, X}}, B}},
- [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; }
- }, {
- {op, {{op, {A, X}}, Y}},
- [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; }
- }};
- }
- // move constants across subtractions
- m_rules += vector<pair<Pattern, function<Pattern()>>>{
- {
- // X - A -> X + (-A)
- {Instruction::SUB, {X, A}},
- [=]() -> Pattern { return {Instruction::ADD, {X, 0 - A.d()}}; }
- }, {
- // (X + A) - Y -> (X - Y) + A
- {Instruction::SUB, {{Instruction::ADD, {X, A}}, Y}},
- [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; }
- }, {
- // (A + X) - Y -> (X - Y) + A
- {Instruction::SUB, {{Instruction::ADD, {A, X}}, Y}},
- [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; }
- }, {
- // X - (Y + A) -> (X - Y) + (-A)
- {Instruction::SUB, {X, {Instruction::ADD, {Y, A}}}},
- [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; }
- }, {
- // X - (A + Y) -> (X - Y) + (-A)
- {Instruction::SUB, {X, {Instruction::ADD, {A, Y}}}},
- [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; }
- }
- };
-}
-
ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, bool _secondRun)
{
static Rules rules;
@@ -366,21 +192,17 @@ ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr,
)
return -1;
- for (auto const& rule: rules.rules())
+ if (auto match = rules.findFirstMatch(_expr, *this))
{
- rules.resetMatchGroups();
- if (rule.first.matches(_expr, *this))
- {
- // Debug info
- //cout << "Simplifying " << *_expr.item << "(";
- //for (Id arg: _expr.arguments)
- // cout << fullDAGToString(arg) << ", ";
- //cout << ")" << endl;
- //cout << "with rule " << rule.first.toString() << endl;
- //ExpressionTemplate t(rule.second());
- //cout << "to " << rule.second().toString() << endl;
- return rebuildExpression(ExpressionTemplate(rule.second(), _expr.item->location()));
- }
+ // Debug info
+ //cout << "Simplifying " << *_expr.item << "(";
+ //for (Id arg: _expr.arguments)
+ // cout << fullDAGToString(arg) << ", ";
+ //cout << ")" << endl;
+ //cout << "with rule " << match->first.toString() << endl;
+ //ExpressionTemplate t(match->second());
+ //cout << "to " << match->second().toString() << endl;
+ return rebuildExpression(ExpressionTemplate(match->second(), _expr.item->location()));
}
if (!_secondRun && _expr.arguments.size() == 2 && SemanticInformation::isCommutativeOperation(*_expr.item))
@@ -403,122 +225,3 @@ ExpressionClasses::Id ExpressionClasses::rebuildExpression(ExpressionTemplate co
arguments.push_back(rebuildExpression(t));
return find(_template.item, arguments);
}
-
-
-Pattern::Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments):
- m_type(Operation),
- m_requireDataMatch(true),
- m_data(_instruction),
- m_arguments(_arguments)
-{
-}
-
-void Pattern::setMatchGroup(unsigned _group, map<unsigned, Expression const*>& _matchGroups)
-{
- m_matchGroup = _group;
- m_matchGroups = &_matchGroups;
-}
-
-bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const
-{
- if (!matchesBaseItem(_expr.item))
- return false;
- if (m_matchGroup)
- {
- if (!m_matchGroups->count(m_matchGroup))
- (*m_matchGroups)[m_matchGroup] = &_expr;
- else if ((*m_matchGroups)[m_matchGroup]->id != _expr.id)
- return false;
- }
- assertThrow(m_arguments.size() == 0 || _expr.arguments.size() == m_arguments.size(), OptimizerException, "");
- for (size_t i = 0; i < m_arguments.size(); ++i)
- if (!m_arguments[i].matches(_classes.representative(_expr.arguments[i]), _classes))
- return false;
- return true;
-}
-
-AssemblyItem Pattern::toAssemblyItem(SourceLocation const& _location) const
-{
- return AssemblyItem(m_type, m_data, _location);
-}
-
-string Pattern::toString() const
-{
- stringstream s;
- switch (m_type)
- {
- case Operation:
- s << instructionInfo(Instruction(unsigned(m_data))).name;
- break;
- case Push:
- s << "PUSH " << hex << m_data;
- break;
- case UndefinedItem:
- s << "ANY";
- break;
- default:
- s << "t=" << dec << m_type << " d=" << hex << m_data;
- break;
- }
- if (!m_requireDataMatch)
- s << " ~";
- if (m_matchGroup)
- s << "[" << dec << m_matchGroup << "]";
- s << "(";
- for (Pattern const& p: m_arguments)
- s << p.toString() << ", ";
- s << ")";
- return s.str();
-}
-
-bool Pattern::matchesBaseItem(AssemblyItem const* _item) const
-{
- if (m_type == UndefinedItem)
- return true;
- if (!_item)
- return false;
- if (m_type != _item->type())
- return false;
- if (m_requireDataMatch && m_data != _item->data())
- return false;
- return true;
-}
-
-Pattern::Expression const& Pattern::matchGroupValue() const
-{
- assertThrow(m_matchGroup > 0, OptimizerException, "");
- assertThrow(!!m_matchGroups, OptimizerException, "");
- assertThrow((*m_matchGroups)[m_matchGroup], OptimizerException, "");
- return *(*m_matchGroups)[m_matchGroup];
-}
-
-
-ExpressionTemplate::ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location)
-{
- if (_pattern.matchGroup())
- {
- hasId = true;
- id = _pattern.id();
- }
- else
- {
- hasId = false;
- item = _pattern.toAssemblyItem(_location);
- }
- for (auto const& arg: _pattern.arguments())
- arguments.push_back(ExpressionTemplate(arg, _location));
-}
-
-string ExpressionTemplate::toString() const
-{
- stringstream s;
- if (hasId)
- s << id;
- else
- s << item;
- s << "(";
- for (auto const& arg: arguments)
- s << arg.toString();
- s << ")";
- return s.str();
-}
diff --git a/libevmasm/ExpressionClasses.h b/libevmasm/ExpressionClasses.h
index 11a698dd..5d53b292 100644
--- a/libevmasm/ExpressionClasses.h
+++ b/libevmasm/ExpressionClasses.h
@@ -121,70 +121,5 @@ private:
std::vector<std::shared_ptr<AssemblyItem>> m_spareAssemblyItems;
};
-/**
- * Pattern to match against an expression.
- * Also stores matched expressions to retrieve them later, for constructing new expressions using
- * ExpressionTemplate.
- */
-class Pattern
-{
-public:
- using Expression = ExpressionClasses::Expression;
- using Id = ExpressionClasses::Id;
-
- // Matches a specific constant value.
- Pattern(unsigned _value): Pattern(u256(_value)) {}
- // Matches a specific constant value.
- Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(_value) {}
- // Matches a specific assembly item type or anything if not given.
- Pattern(AssemblyItemType _type = UndefinedItem): m_type(_type) {}
- // Matches a given instruction with given arguments
- Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments = {});
- /// Sets this pattern to be part of the match group with the identifier @a _group.
- /// Inside one rule, all patterns in the same match group have to match expressions from the
- /// same expression equivalence class.
- void setMatchGroup(unsigned _group, std::map<unsigned, Expression const*>& _matchGroups);
- unsigned matchGroup() const { return m_matchGroup; }
- bool matches(Expression const& _expr, ExpressionClasses const& _classes) const;
-
- AssemblyItem toAssemblyItem(SourceLocation const& _location) const;
- std::vector<Pattern> arguments() const { return m_arguments; }
-
- /// @returns the id of the matched expression if this pattern is part of a match group.
- Id id() const { return matchGroupValue().id; }
- /// @returns the data of the matched expression if this pattern is part of a match group.
- u256 const& d() const { return matchGroupValue().item->data(); }
-
- std::string toString() const;
-
-private:
- bool matchesBaseItem(AssemblyItem const* _item) const;
- Expression const& matchGroupValue() const;
-
- AssemblyItemType m_type;
- bool m_requireDataMatch = false;
- u256 m_data = 0;
- std::vector<Pattern> m_arguments;
- unsigned m_matchGroup = 0;
- std::map<unsigned, Expression const*>* m_matchGroups = nullptr;
-};
-
-/**
- * Template for a new expression that can be built from matched patterns.
- */
-struct ExpressionTemplate
-{
- using Expression = ExpressionClasses::Expression;
- using Id = ExpressionClasses::Id;
- explicit ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location);
- std::string toString() const;
- bool hasId = false;
- /// Id of the matched expression, if available.
- Id id = Id(-1);
- // Otherwise, assembly item.
- AssemblyItem item = UndefinedItem;
- std::vector<ExpressionTemplate> arguments;
-};
-
}
}
diff --git a/libevmasm/SimplificationRules.cpp b/libevmasm/SimplificationRules.cpp
new file mode 100644
index 00000000..2976d95f
--- /dev/null
+++ b/libevmasm/SimplificationRules.cpp
@@ -0,0 +1,370 @@
+/*
+ This file is part of solidity.
+
+ solidity is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ solidity is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with solidity. If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * @file ExpressionClasses.cpp
+ * @author Christian <c@ethdev.com>
+ * @date 2015
+ * Container for equivalence classes of expressions for use in common subexpression elimination.
+ */
+
+#include <libevmasm/ExpressionClasses.h>
+#include <utility>
+#include <tuple>
+#include <functional>
+#include <boost/range/adaptor/reversed.hpp>
+#include <boost/noncopyable.hpp>
+#include <libevmasm/Assembly.h>
+#include <libevmasm/CommonSubexpressionEliminator.h>
+#include <libevmasm/SimplificationRules.h>
+
+using namespace std;
+using namespace dev;
+using namespace dev::eth;
+
+
+pair<Pattern, function<Pattern()> > const* Rules::findFirstMatch(
+ Expression const& _expr,
+ ExpressionClasses const& _classes
+)
+{
+ resetMatchGroups();
+
+ assertThrow(_expr.item, OptimizerException, "");
+ for (auto const& rule: m_rules[byte(_expr.item->instruction())])
+ {
+ if (rule.first.matches(_expr, _classes))
+ return &rule;
+ resetMatchGroups();
+ }
+ return nullptr;
+}
+
+void Rules::addRules(std::vector<std::pair<Pattern, std::function<Pattern ()> > > const& _rules)
+{
+ for (auto const& r: _rules)
+ addRule(r);
+}
+
+void Rules::addRule(std::pair<Pattern, std::function<Pattern()> > const& _rule)
+{
+ m_rules[byte(_rule.first.instruction())].push_back(_rule);
+}
+
+template <class S> S divWorkaround(S const& _a, S const& _b)
+{
+ return (S)(bigint(_a) / bigint(_b));
+}
+
+template <class S> S modWorkaround(S const& _a, S const& _b)
+{
+ return (S)(bigint(_a) % bigint(_b));
+}
+
+Rules::Rules()
+{
+ // Multiple occurences of one of these inside one rule must match the same equivalence class.
+ // Constants.
+ Pattern A(Push);
+ Pattern B(Push);
+ Pattern C(Push);
+ // Anything.
+ Pattern X;
+ Pattern Y;
+ Pattern Z;
+ A.setMatchGroup(1, m_matchGroups);
+ B.setMatchGroup(2, m_matchGroups);
+ C.setMatchGroup(3, m_matchGroups);
+ X.setMatchGroup(4, m_matchGroups);
+ Y.setMatchGroup(5, m_matchGroups);
+ Z.setMatchGroup(6, m_matchGroups);
+
+ addRules(vector<pair<Pattern, function<Pattern()>>>{
+ // arithmetics on constants
+ {{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }},
+ {{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }},
+ {{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }},
+ {{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : divWorkaround(A.d(), B.d()); }},
+ {{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(divWorkaround(u2s(A.d()), u2s(B.d()))); }},
+ {{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : modWorkaround(A.d(), B.d()); }},
+ {{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(modWorkaround(u2s(A.d()), u2s(B.d()))); }},
+ {{Instruction::EXP, {A, B}}, [=]{ return u256(boost::multiprecision::powm(bigint(A.d()), bigint(B.d()), bigint(1) << 256)); }},
+ {{Instruction::NOT, {A}}, [=]{ return ~A.d(); }},
+ {{Instruction::LT, {A, B}}, [=]() { return A.d() < B.d() ? u256(1) : 0; }},
+ {{Instruction::GT, {A, B}}, [=]() -> u256 { return A.d() > B.d() ? 1 : 0; }},
+ {{Instruction::SLT, {A, B}}, [=]() -> u256 { return u2s(A.d()) < u2s(B.d()) ? 1 : 0; }},
+ {{Instruction::SGT, {A, B}}, [=]() -> u256 { return u2s(A.d()) > u2s(B.d()) ? 1 : 0; }},
+ {{Instruction::EQ, {A, B}}, [=]() -> u256 { return A.d() == B.d() ? 1 : 0; }},
+ {{Instruction::ISZERO, {A}}, [=]() -> u256 { return A.d() == 0 ? 1 : 0; }},
+ {{Instruction::AND, {A, B}}, [=]{ return A.d() & B.d(); }},
+ {{Instruction::OR, {A, B}}, [=]{ return A.d() | B.d(); }},
+ {{Instruction::XOR, {A, B}}, [=]{ return A.d() ^ B.d(); }},
+ {{Instruction::BYTE, {A, B}}, [=]{ return A.d() >= 32 ? 0 : (B.d() >> unsigned(8 * (31 - A.d()))) & 0xff; }},
+ {{Instruction::ADDMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) + bigint(B.d())) % C.d()); }},
+ {{Instruction::MULMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) * bigint(B.d())) % C.d()); }},
+ {{Instruction::MULMOD, {A, B, C}}, [=]{ return A.d() * B.d(); }},
+ {{Instruction::SIGNEXTEND, {A, B}}, [=]() -> u256 {
+ if (A.d() >= 31)
+ return B.d();
+ unsigned testBit = unsigned(A.d()) * 8 + 7;
+ u256 mask = (u256(1) << testBit) - 1;
+ return u256(boost::multiprecision::bit_test(B.d(), testBit) ? B.d() | ~mask : B.d() & mask);
+ }},
+
+ // invariants involving known constants
+ {{Instruction::ADD, {X, 0}}, [=]{ return X; }},
+ {{Instruction::SUB, {X, 0}}, [=]{ return X; }},
+ {{Instruction::MUL, {X, 1}}, [=]{ return X; }},
+ {{Instruction::DIV, {X, 1}}, [=]{ return X; }},
+ {{Instruction::SDIV, {X, 1}}, [=]{ return X; }},
+ {{Instruction::OR, {X, 0}}, [=]{ return X; }},
+ {{Instruction::XOR, {X, 0}}, [=]{ return X; }},
+ {{Instruction::AND, {X, ~u256(0)}}, [=]{ return X; }},
+ {{Instruction::AND, {X, 0}}, [=]{ return u256(0); }},
+ {{Instruction::MUL, {X, 0}}, [=]{ return u256(0); }},
+ {{Instruction::DIV, {X, 0}}, [=]{ return u256(0); }},
+ {{Instruction::DIV, {0, X}}, [=]{ return u256(0); }},
+ {{Instruction::MOD, {X, 0}}, [=]{ return u256(0); }},
+ {{Instruction::MOD, {0, X}}, [=]{ return u256(0); }},
+ {{Instruction::OR, {X, ~u256(0)}}, [=]{ return ~u256(0); }},
+ {{Instruction::EQ, {X, 0}}, [=]() -> Pattern { return {Instruction::ISZERO, {X}}; } },
+ // operations involving an expression and itself
+ {{Instruction::AND, {X, X}}, [=]{ return X; }},
+ {{Instruction::OR, {X, X}}, [=]{ return X; }},
+ {{Instruction::XOR, {X, X}}, [=]{ return u256(0); }},
+ {{Instruction::SUB, {X, X}}, [=]{ return u256(0); }},
+ {{Instruction::EQ, {X, X}}, [=]{ return u256(1); }},
+ {{Instruction::LT, {X, X}}, [=]{ return u256(0); }},
+ {{Instruction::SLT, {X, X}}, [=]{ return u256(0); }},
+ {{Instruction::GT, {X, X}}, [=]{ return u256(0); }},
+ {{Instruction::SGT, {X, X}}, [=]{ return u256(0); }},
+ {{Instruction::MOD, {X, X}}, [=]{ return u256(0); }},
+
+ {{Instruction::NOT, {{Instruction::NOT, {X}}}}, [=]{ return X; }},
+ {{Instruction::XOR, {{{X}, {Instruction::XOR, {X, Y}}}}}, [=]{ return Y; }},
+ {{Instruction::OR, {{{X}, {Instruction::AND, {X, Y}}}}}, [=]{ return X; }},
+ {{Instruction::AND, {{{X}, {Instruction::OR, {X, Y}}}}}, [=]{ return X; }},
+ {{Instruction::AND, {{{X}, {Instruction::NOT, {X}}}}}, [=]{ return u256(0); }},
+ {{Instruction::OR, {{{X}, {Instruction::NOT, {X}}}}}, [=]{ return ~u256(0); }},
+ });
+ // Double negation of opcodes with binary result
+ for (auto const& op: vector<Instruction>{
+ Instruction::EQ,
+ Instruction::LT,
+ Instruction::SLT,
+ Instruction::GT,
+ Instruction::SGT
+ })
+ addRule({
+ {Instruction::ISZERO, {{Instruction::ISZERO, {{op, {X, Y}}}}}},
+ [=]() -> Pattern { return {op, {X, Y}}; }
+ });
+ addRule({
+ {Instruction::ISZERO, {{Instruction::ISZERO, {{Instruction::ISZERO, {X}}}}}},
+ [=]() -> Pattern { return {Instruction::ISZERO, {X}}; }
+ });
+ addRule({
+ {Instruction::ISZERO, {{Instruction::XOR, {X, Y}}}},
+ [=]() -> Pattern { return { Instruction::EQ, {X, Y} }; }
+ });
+ // Associative operations
+ for (auto const& opFun: vector<pair<Instruction,function<u256(u256 const&,u256 const&)>>>{
+ {Instruction::ADD, plus<u256>()},
+ {Instruction::MUL, multiplies<u256>()},
+ {Instruction::AND, bit_and<u256>()},
+ {Instruction::OR, bit_or<u256>()},
+ {Instruction::XOR, bit_xor<u256>()}
+ })
+ {
+ auto op = opFun.first;
+ auto fun = opFun.second;
+ // Moving constants to the outside, order matters here!
+ // we need actions that return expressions (or patterns?) here, and we need also reversed rules
+ // (X+A)+B -> X+(A+B)
+ addRules(vector<pair<Pattern, function<Pattern()>>>{{
+ {op, {{op, {X, A}}, B}},
+ [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; }
+ }, {
+ // X+(Y+A) -> (X+Y)+A
+ {op, {{op, {X, A}}, Y}},
+ [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; }
+ }, {
+ // For now, we still need explicit commutativity for the inner pattern
+ {op, {{op, {A, X}}, B}},
+ [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; }
+ }, {
+ {op, {{op, {A, X}}, Y}},
+ [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; }
+ }});
+ }
+ // move constants across subtractions
+ addRules(vector<pair<Pattern, function<Pattern()>>>{
+ {
+ // X - A -> X + (-A)
+ {Instruction::SUB, {X, A}},
+ [=]() -> Pattern { return {Instruction::ADD, {X, 0 - A.d()}}; }
+ }, {
+ // (X + A) - Y -> (X - Y) + A
+ {Instruction::SUB, {{Instruction::ADD, {X, A}}, Y}},
+ [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; }
+ }, {
+ // (A + X) - Y -> (X - Y) + A
+ {Instruction::SUB, {{Instruction::ADD, {A, X}}, Y}},
+ [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; }
+ }, {
+ // X - (Y + A) -> (X - Y) + (-A)
+ {Instruction::SUB, {X, {Instruction::ADD, {Y, A}}}},
+ [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; }
+ }, {
+ // X - (A + Y) -> (X - Y) + (-A)
+ {Instruction::SUB, {X, {Instruction::ADD, {A, Y}}}},
+ [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; }
+ }
+ });
+}
+
+Pattern::Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments):
+ m_type(Operation),
+ m_instruction(_instruction),
+ m_arguments(_arguments)
+{
+}
+
+void Pattern::setMatchGroup(unsigned _group, map<unsigned, Expression const*>& _matchGroups)
+{
+ m_matchGroup = _group;
+ m_matchGroups = &_matchGroups;
+}
+
+bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const
+{
+ if (!matchesBaseItem(_expr.item))
+ return false;
+ if (m_matchGroup)
+ {
+ if (!m_matchGroups->count(m_matchGroup))
+ (*m_matchGroups)[m_matchGroup] = &_expr;
+ else if ((*m_matchGroups)[m_matchGroup]->id != _expr.id)
+ return false;
+ }
+ assertThrow(m_arguments.size() == 0 || _expr.arguments.size() == m_arguments.size(), OptimizerException, "");
+ for (size_t i = 0; i < m_arguments.size(); ++i)
+ if (!m_arguments[i].matches(_classes.representative(_expr.arguments[i]), _classes))
+ return false;
+ return true;
+}
+
+AssemblyItem Pattern::toAssemblyItem(SourceLocation const& _location) const
+{
+ if (m_type == Operation)
+ return AssemblyItem(m_instruction, _location);
+ else
+ return AssemblyItem(m_type, data(), _location);
+}
+
+string Pattern::toString() const
+{
+ stringstream s;
+ switch (m_type)
+ {
+ case Operation:
+ s << instructionInfo(m_instruction).name;
+ break;
+ case Push:
+ if (m_data)
+ s << "PUSH " << hex << data();
+ else
+ s << "PUSH ";
+ break;
+ case UndefinedItem:
+ s << "ANY";
+ break;
+ default:
+ if (m_data)
+ s << "t=" << dec << m_type << " d=" << hex << data();
+ else
+ s << "t=" << dec << m_type << " d: nullptr";
+ break;
+ }
+ if (!m_requireDataMatch)
+ s << " ~";
+ if (m_matchGroup)
+ s << "[" << dec << m_matchGroup << "]";
+ s << "(";
+ for (Pattern const& p: m_arguments)
+ s << p.toString() << ", ";
+ s << ")";
+ return s.str();
+}
+
+bool Pattern::matchesBaseItem(AssemblyItem const* _item) const
+{
+ if (m_type == UndefinedItem)
+ return true;
+ if (!_item)
+ return false;
+ if (m_type != _item->type())
+ return false;
+ else if (m_type == Operation)
+ return m_instruction == _item->instruction();
+ else if (m_requireDataMatch)
+ return data() == _item->data();
+ return true;
+}
+
+Pattern::Expression const& Pattern::matchGroupValue() const
+{
+ assertThrow(m_matchGroup > 0, OptimizerException, "");
+ assertThrow(!!m_matchGroups, OptimizerException, "");
+ assertThrow((*m_matchGroups)[m_matchGroup], OptimizerException, "");
+ return *(*m_matchGroups)[m_matchGroup];
+}
+
+u256 const& Pattern::data() const
+{
+ assertThrow(m_data, OptimizerException, "");
+ return *m_data;
+}
+
+ExpressionTemplate::ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location)
+{
+ if (_pattern.matchGroup())
+ {
+ hasId = true;
+ id = _pattern.id();
+ }
+ else
+ {
+ hasId = false;
+ item = _pattern.toAssemblyItem(_location);
+ }
+ for (auto const& arg: _pattern.arguments())
+ arguments.push_back(ExpressionTemplate(arg, _location));
+}
+
+string ExpressionTemplate::toString() const
+{
+ stringstream s;
+ if (hasId)
+ s << id;
+ else
+ s << item;
+ s << "(";
+ for (auto const& arg: arguments)
+ s << arg.toString();
+ s << ")";
+ return s.str();
+}
diff --git a/libevmasm/SimplificationRules.h b/libevmasm/SimplificationRules.h
new file mode 100644
index 00000000..a4da5849
--- /dev/null
+++ b/libevmasm/SimplificationRules.h
@@ -0,0 +1,140 @@
+/*
+ This file is part of solidity.
+
+ solidity is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ solidity is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with solidity. If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * @file SimplificationRules
+ * @author Christian <chris@ethereum.org>
+ * @date 2017
+ * Module for applying replacement rules against Expressions.
+ */
+
+#pragma once
+
+#include <libevmasm/ExpressionClasses.h>
+
+#include <functional>
+#include <vector>
+
+namespace dev
+{
+namespace eth
+{
+
+class Pattern;
+
+/**
+ * Container for all simplification rules.
+ */
+class Rules: public boost::noncopyable
+{
+public:
+ using Expression = ExpressionClasses::Expression;
+
+ Rules();
+
+ /// @returns a pointer to the first matching pattern and sets the match
+ /// groups accordingly.
+ std::pair<Pattern, std::function<Pattern()>> const* findFirstMatch(
+ Expression const& _expr,
+ ExpressionClasses const& _classes
+ );
+
+private:
+ void addRules(std::vector<std::pair<Pattern, std::function<Pattern()>>> const& _rules);
+ void addRule(std::pair<Pattern, std::function<Pattern()>> const& _rule);
+
+ void resetMatchGroups() { m_matchGroups.clear(); }
+
+ std::map<unsigned, Expression const*> m_matchGroups;
+ std::vector<std::pair<Pattern, std::function<Pattern()>>> m_rules[256];
+};
+
+/**
+ * Pattern to match against an expression.
+ * Also stores matched expressions to retrieve them later, for constructing new expressions using
+ * ExpressionTemplate.
+ */
+class Pattern
+{
+public:
+ using Expression = ExpressionClasses::Expression;
+ using Id = ExpressionClasses::Id;
+
+ // Matches a specific constant value.
+ Pattern(unsigned _value): Pattern(u256(_value)) {}
+ // Matches a specific constant value.
+ Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(std::make_shared<u256>(_value)) {}
+ // Matches a specific assembly item type or anything if not given.
+ Pattern(AssemblyItemType _type = UndefinedItem): m_type(_type) {}
+ // Matches a given instruction with given arguments
+ Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments = {});
+ /// Sets this pattern to be part of the match group with the identifier @a _group.
+ /// Inside one rule, all patterns in the same match group have to match expressions from the
+ /// same expression equivalence class.
+ void setMatchGroup(unsigned _group, std::map<unsigned, Expression const*>& _matchGroups);
+ unsigned matchGroup() const { return m_matchGroup; }
+ bool matches(Expression const& _expr, ExpressionClasses const& _classes) const;
+
+ AssemblyItem toAssemblyItem(SourceLocation const& _location) const;
+ std::vector<Pattern> arguments() const { return m_arguments; }
+
+ /// @returns the id of the matched expression if this pattern is part of a match group.
+ Id id() const { return matchGroupValue().id; }
+ /// @returns the data of the matched expression if this pattern is part of a match group.
+ u256 const& d() const { return matchGroupValue().item->data(); }
+
+ std::string toString() const;
+
+ AssemblyItemType type() const { return m_type; }
+ Instruction instruction() const
+ {
+ assertThrow(type() == Operation, OptimizerException, "");
+ return m_instruction;
+ }
+
+private:
+ bool matchesBaseItem(AssemblyItem const* _item) const;
+ Expression const& matchGroupValue() const;
+ u256 const& data() const;
+
+ AssemblyItemType m_type;
+ bool m_requireDataMatch = false;
+ Instruction m_instruction; ///< Only valid if m_type is Operation
+ std::shared_ptr<u256> m_data; ///< Only valid if m_type is not Operation
+ std::vector<Pattern> m_arguments;
+ unsigned m_matchGroup = 0;
+ std::map<unsigned, Expression const*>* m_matchGroups = nullptr;
+};
+
+/**
+ * Template for a new expression that can be built from matched patterns.
+ */
+struct ExpressionTemplate
+{
+ using Expression = ExpressionClasses::Expression;
+ using Id = ExpressionClasses::Id;
+ explicit ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location);
+ std::string toString() const;
+ bool hasId = false;
+ /// Id of the matched expression, if available.
+ Id id = Id(-1);
+ // Otherwise, assembly item.
+ AssemblyItem item = UndefinedItem;
+ std::vector<ExpressionTemplate> arguments;
+};
+
+}
+}