aboutsummaryrefslogtreecommitdiffstats
path: root/CommonSubexpressionEliminator.h
blob: 6156bc81a1e2f9bdf1089e9bccdf3baf7257cd0a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
/*
    This file is part of cpp-ethereum.

    cpp-ethereum is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    cpp-ethereum is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
*/
/**
 * @file CommonSubexpressionEliminator.h
 * @author Christian <c@ethdev.com>
 * @date 2015
 * Optimizer step for common subexpression elimination and stack reorganisation.
 */

#pragma once

#include <vector>
#include <map>
#include <set>
#include <tuple>
#include <ostream>
#include <libdevcore/CommonIO.h>
#include <libdevcore/Exceptions.h>
#include <libevmasm/ExpressionClasses.h>
#include <libevmasm/SemanticInformation.h>

namespace dev
{
namespace eth
{

class AssemblyItem;
using AssemblyItems = std::vector<AssemblyItem>;

/**
 * Optimizer step that performs common subexpression elimination and stack reorganisation,
 * i.e. it tries to infer equality among expressions and compute the values of two expressions
 * known to be equal only once.
 *
 * The general workings are that for each assembly item that is fed into the eliminator, an
 * equivalence class is derived from the operation and the equivalence class of its arguments.
 * DUPi, SWAPi and some arithmetic instructions are used to infer equivalences while these
 * classes are determined.
 *
 * When the list of optimized items is requested, they are generated in a bottom-up fashion,
 * adding code for equivalence classes that were not yet computed.
 */
class CommonSubexpressionEliminator
{
public:
    using Id = ExpressionClasses::Id;
    struct StoreOperation
    {
        enum Target { Memory, Storage };
        StoreOperation(
            Target _target,
            Id _slot,
            unsigned _sequenceNumber,
            Id _expression
        ): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {}
        Target target;
        Id slot;
        unsigned sequenceNumber;
        Id expression;
    };

    /// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first
    /// item that must be fed into a new instance of the eliminator.
    template <class _AssemblyItemIterator>
    _AssemblyItemIterator feedItems(_AssemblyItemIterator _iterator, _AssemblyItemIterator _end);

    /// @returns the resulting items after optimization.
    AssemblyItems getOptimizedItems();

    /// Streams debugging information to @a _out.
    std::ostream& stream(
        std::ostream& _out,
        std::map<int, Id> _initialStack = std::map<int, Id>(),
        std::map<int, Id> _targetStack = std::map<int, Id>()
    ) const;

private:
    /// Feeds the item into the system for analysis.
    void feedItem(AssemblyItem const& _item, bool _copyItem = false);

    /// Tries to optimize the item that breaks the basic block at the end.
    void optimizeBreakingItem();

    /// Simplifies the given item using
    /// Assigns a new equivalence class to the next sequence number of the given stack element.
    void setStackElement(int _stackHeight, Id _class);
    /// Swaps the given stack elements in their next sequence number.
    void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location);
    /// Retrieves the current equivalence class fo the given stack element (or generates a new
    /// one if it does not exist yet).
    Id stackElement(int _stackHeight, SourceLocation const& _location);
    /// @returns the equivalence class id of the special initial stack element at the given height
    /// (must not be positive).
    Id initialStackElement(int _stackHeight, SourceLocation const& _location);

    /// Increments the sequence number, deletes all storage information that might be overwritten
    /// and stores the new value at the given slot.
    void storeInStorage(Id _slot, Id _value, SourceLocation const& _location);
    /// Retrieves the current value at the given slot in storage or creates a new special sload class.
    Id loadFromStorage(Id _slot, SourceLocation const& _location);
    /// Increments the sequence number, deletes all memory information that might be overwritten
    /// and stores the new value at the given slot.
    void storeInMemory(Id _slot, Id _value, SourceLocation const& _location);
    /// Retrieves the current value at the given slot in memory or creates a new special mload class.
    Id loadFromMemory(Id _slot, SourceLocation const& _location);
    /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory.
    Id applySha3(Id _start, Id _length, SourceLocation const& _location);

    /// Current stack height, can be negative.
    int m_stackHeight = 0;
    /// Current stack layout, mapping stack height -> equivalence class
    std::map<int, Id> m_stackElements;
    /// Current sequence number, this is incremented with each modification to storage or memory.
    unsigned m_sequenceNumber = 1;
    /// Knowledge about storage content.
    std::map<Id, Id> m_storageContent;
    /// Knowledge about memory content. Keys are memory addresses, note that the values overlap
    /// and are not contained here if they are not completely known.
    std::map<Id, Id> m_memoryContent;
    /// Keeps record of all sha3 hashes that are computed.
    std::map<std::vector<Id>, Id> m_knownSha3Hashes;
    /// Keeps information about which storage or memory slots were written to at which sequence
    /// number with what instruction.
    std::vector<StoreOperation> m_storeOperations;
    /// Structure containing the classes of equivalent expressions.
    ExpressionClasses m_expressionClasses;

    /// The item that breaks the basic block, can be nullptr.
    /// It is usually appended to the block but can be optimized in some cases.
    AssemblyItem const* m_breakingItem = nullptr;
};

/**
 * Unit that generates code from current stack layout, target stack layout and information about
 * the equivalence classes.
 */
class CSECodeGenerator
{
public:
    using StoreOperation = CommonSubexpressionEliminator::StoreOperation;
    using StoreOperations = std::vector<StoreOperation>;
    using Id = ExpressionClasses::Id;

    /// Initializes the code generator with the given classes and store operations.
    /// The store operations have to be sorted by sequence number in ascending order.
    CSECodeGenerator(ExpressionClasses& _expressionClasses, StoreOperations const& _storeOperations);

    /// @returns the assembly items generated from the given requirements
    /// @param _initialStack current contents of the stack (up to stack height of zero)
    /// @param _targetStackContents final contents of the stack, by stack height relative to initial
    /// @note should only be called once on each object.
    AssemblyItems generateCode(
        std::map<int, Id> const& _initialStack,
        std::map<int, Id> const& _targetStackContents
    );

private:
    /// Recursively discovers all dependencies to @a m_requests.
    void addDependencies(Id _c);

    /// Produce code that generates the given element if it is not yet present.
    /// @returns the stack position of the element or c_invalidPosition if it does not actually
    /// generate a value on the stack.
    /// @param _allowSequenced indicates that sequence-constrained operations are allowed
    int generateClassElement(Id _c, bool _allowSequenced = false);
    /// @returns the position of the representative of the given id on the stack.
    /// @note throws an exception if it is not on the stack.
    int classElementPosition(Id _id) const;

    /// @returns true if @a _element can be removed - in general or, if given, while computing @a _result.
    bool canBeRemoved(Id _element, Id _result = Id(-1));

    /// Appends code to remove the topmost stack element if it can be removed.
    bool removeStackTopIfPossible();

    /// Appends a dup instruction to m_generatedItems to retrieve the element at the given stack position.
    void appendDup(int _fromPosition, SourceLocation const& _location);
    /// Appends a swap instruction to m_generatedItems to retrieve the element at the given stack position.
    /// @note this might also remove the last item if it exactly the same swap instruction.
    void appendOrRemoveSwap(int _fromPosition, SourceLocation const& _location);
    /// Appends the given assembly item.
    void appendItem(AssemblyItem const& _item);

    static const int c_invalidPosition = -0x7fffffff;

    AssemblyItems m_generatedItems;
    /// Current height of the stack relative to the start.
    int m_stackHeight = 0;
    /// If (b, a) is in m_requests then b is needed to compute a.
    std::multimap<Id, Id> m_neededBy;
    /// Current content of the stack.
    std::map<int, Id> m_stack;
    /// Current positions of equivalence classes, equal to c_invalidPosition if already deleted.
    std::map<Id, int> m_classPositions;

    /// The actual eqivalence class items and how to compute them.
    ExpressionClasses& m_expressionClasses;
    /// Keeps information about which storage or memory slots were written to by which operations.
    /// The operations are sorted ascendingly by sequence number.
    std::map<std::pair<StoreOperation::Target, Id>, StoreOperations> m_storeOperations;
    /// The set of equivalence classes that should be present on the stack at the end.
    std::set<Id> m_finalClasses;
};

template <class _AssemblyItemIterator>
_AssemblyItemIterator CommonSubexpressionEliminator::feedItems(
    _AssemblyItemIterator _iterator,
    _AssemblyItemIterator _end
)
{
    for (; _iterator != _end && !SemanticInformation::breaksCSEAnalysisBlock(*_iterator); ++_iterator)
        feedItem(*_iterator);
    if (_iterator != _end)
        m_breakingItem = &(*_iterator++);
    return _iterator;
}

}
}