diff options
author | Christian <c@ethdev.com> | 2014-10-30 08:20:32 +0800 |
---|---|---|
committer | Christian <c@ethdev.com> | 2014-10-30 08:25:42 +0800 |
commit | 7f19f3d133b74bd7ebc96d18b09e145417b7daac (patch) | |
tree | a344a8faf9675882eb42f4f83e57f3a825844dcf | |
parent | 51349bdae53e7d495732085c446ff9488473dcc8 (diff) | |
download | dexon-solidity-7f19f3d133b74bd7ebc96d18b09e145417b7daac.tar.gz dexon-solidity-7f19f3d133b74bd7ebc96d18b09e145417b7daac.tar.zst dexon-solidity-7f19f3d133b74bd7ebc96d18b09e145417b7daac.zip |
Contract compiler and also add ExpressionStatement to AST.
ExpressionStatement functions as glue between Statements and Expressions.
This way it is possible to detect when the border between statements and
expressions is crossed while walking the AST. Note that ExpressionStatement is
not the only border, almost every statement can contains expressions.
-rw-r--r-- | AST.cpp | 43 | ||||
-rw-r--r-- | AST.h | 65 | ||||
-rw-r--r-- | ASTForward.h | 1 | ||||
-rw-r--r-- | ASTPrinter.cpp | 12 | ||||
-rw-r--r-- | ASTPrinter.h | 2 | ||||
-rw-r--r-- | ASTVisitor.h | 2 | ||||
-rw-r--r-- | CMakeLists.txt | 4 | ||||
-rw-r--r-- | Compiler.cpp | 509 | ||||
-rw-r--r-- | Compiler.h | 141 | ||||
-rw-r--r-- | CompilerUtilities.cpp | 60 | ||||
-rw-r--r-- | CompilerUtilities.h | 83 | ||||
-rw-r--r-- | ExpressionCompiler.cpp | 408 | ||||
-rw-r--r-- | ExpressionCompiler.h | 79 | ||||
-rw-r--r-- | NameAndTypeResolver.cpp | 32 | ||||
-rw-r--r-- | NameAndTypeResolver.h | 17 | ||||
-rw-r--r-- | Parser.cpp | 21 | ||||
-rw-r--r-- | Parser.h | 1 | ||||
-rw-r--r-- | Types.cpp | 18 | ||||
-rw-r--r-- | Types.h | 6 |
19 files changed, 937 insertions, 567 deletions
@@ -167,6 +167,14 @@ void Return::accept(ASTVisitor& _visitor) _visitor.endVisit(*this); } +void ExpressionStatement::accept(ASTVisitor& _visitor) +{ + if (_visitor.visit(*this)) + if (m_expression) + m_expression->accept(_visitor); + _visitor.endVisit(*this); +} + void VariableDefinition::accept(ASTVisitor& _visitor) { if (_visitor.visit(*this)) @@ -255,14 +263,6 @@ TypeError ASTNode::createTypeError(string const& _description) return TypeError() << errinfo_sourceLocation(getLocation()) << errinfo_comment(_description); } -void Statement::expectType(Expression& _expression, const Type& _expectedType) -{ - _expression.checkTypeRequirements(); - if (!_expression.getType()->isImplicitlyConvertibleTo(_expectedType)) - BOOST_THROW_EXCEPTION(_expression.createTypeError("Type not implicitly convertible to expected type.")); - //@todo provide more information to the exception -} - void Block::checkTypeRequirements() { for (shared_ptr<Statement> const& statement: m_statements) @@ -271,7 +271,7 @@ void Block::checkTypeRequirements() void IfStatement::checkTypeRequirements() { - expectType(*m_condition, BoolType()); + m_condition->expectType(BoolType()); m_trueBody->checkTypeRequirements(); if (m_falseBody) m_falseBody->checkTypeRequirements(); @@ -279,7 +279,7 @@ void IfStatement::checkTypeRequirements() void WhileStatement::checkTypeRequirements() { - expectType(*m_condition, BoolType()); + m_condition->expectType(BoolType()); m_body->checkTypeRequirements(); } @@ -301,7 +301,7 @@ void Return::checkTypeRequirements() "than in returns declaration.")); // this could later be changed such that the paramaters type is an anonymous struct type, // but for now, we only allow one return parameter - expectType(*m_expression, *m_returnParameters->getParameters().front()->getType()); + m_expression->expectType(*m_returnParameters->getParameters().front()->getType()); } void VariableDefinition::checkTypeRequirements() @@ -313,7 +313,7 @@ void VariableDefinition::checkTypeRequirements() if (m_value) { if (m_variable->getType()) - expectType(*m_value, *m_variable->getType()); + m_value->expectType(*m_variable->getType()); else { // no type declared and no previous assignment, infer the type @@ -330,7 +330,7 @@ void Assignment::checkTypeRequirements() m_leftHandSide->checkTypeRequirements(); if (!m_leftHandSide->isLvalue()) BOOST_THROW_EXCEPTION(createTypeError("Expression has to be an lvalue.")); - expectType(*m_rightHandSide, *m_leftHandSide->getType()); + m_rightHandSide->expectType(*m_leftHandSide->getType()); m_type = m_leftHandSide->getType(); if (m_assigmentOperator != Token::ASSIGN) { @@ -340,6 +340,19 @@ void Assignment::checkTypeRequirements() } } +void ExpressionStatement::checkTypeRequirements() +{ + m_expression->checkTypeRequirements(); +} + +void Expression::expectType(const Type& _expectedType) +{ + checkTypeRequirements(); + if (!getType()->isImplicitlyConvertibleTo(_expectedType)) + BOOST_THROW_EXCEPTION(createTypeError("Type not implicitly convertible to expected type.")); + //@todo provide more information to the exception +} + void UnaryOperation::checkTypeRequirements() { // INC, DEC, ADD, SUB, NOT, BIT_NOT, DELETE @@ -411,10 +424,10 @@ void FunctionCall::checkTypeRequirements() BOOST_THROW_EXCEPTION(createTypeError("Invalid type for argument in function call.")); // @todo actually the return type should be an anonymous struct, // but we change it to the type of the first return value until we have structs - if (fun.getReturnParameterList()->getParameters().empty()) + if (fun.getReturnParameters().empty()) m_type = make_shared<VoidType>(); else - m_type = fun.getReturnParameterList()->getParameters().front()->getType(); + m_type = fun.getReturnParameters().front()->getType(); } } @@ -144,7 +144,7 @@ public: ASTNode(_location), m_parameters(_parameters) {} virtual void accept(ASTVisitor& _visitor) override; - std::vector<ASTPointer<VariableDeclaration>> const& getParameters() { return m_parameters; } + std::vector<ASTPointer<VariableDeclaration>> const& getParameters() const { return m_parameters; } private: std::vector<ASTPointer<VariableDeclaration>> m_parameters; @@ -167,15 +167,20 @@ public: bool isDeclaredConst() const { return m_isDeclaredConst; } std::vector<ASTPointer<VariableDeclaration>> const& getParameters() const { return m_parameters->getParameters(); } ParameterList& getParameterList() { return *m_parameters; } + std::vector<ASTPointer<VariableDeclaration>> const& getReturnParameters() const { return m_returnParameters->getParameters(); } ASTPointer<ParameterList> const& getReturnParameterList() const { return m_returnParameters; } Block& getBody() { return *m_body; } + void addLocalVariable(VariableDeclaration const& _localVariable) { m_localVariables.push_back(&_localVariable); } + std::vector<VariableDeclaration const*>const& getLocalVariables() const { return m_localVariables; } private: bool m_isPublic; ASTPointer<ParameterList> m_parameters; bool m_isDeclaredConst; ASTPointer<ParameterList> m_returnParameters; ASTPointer<Block> m_body; + + std::vector<VariableDeclaration const*> m_localVariables; }; /// Declaration of a variable. This can be used in various places, e.g. in function parameter @@ -285,11 +290,6 @@ public: //! This includes checking that operators are applicable to their arguments but also that //! the number of function call arguments matches the number of formal parameters and so forth. virtual void checkTypeRequirements() = 0; - -protected: - //! Helper function, check that the inferred type for @a _expression is @a _expectedType or at - //! least implicitly convertible to @a _expectedType. If not, throw exception. - void expectType(Expression& _expression, Type const& _expectedType); }; /// Brace-enclosed block containing zero or more statements. @@ -318,6 +318,9 @@ public: virtual void accept(ASTVisitor& _visitor) override; virtual void checkTypeRequirements() override; + Expression& getCondition() const { return *m_condition; } + Statement& getTrueStatement() const { return *m_trueBody; } + Statement* getFalseStatement() const { return m_falseBody.get(); } private: ASTPointer<Expression> m_condition; ASTPointer<Statement> m_trueBody; @@ -342,6 +345,8 @@ public: virtual void accept(ASTVisitor& _visitor) override; virtual void checkTypeRequirements() override; + Expression& getCondition() const { return *m_condition; } + Statement& getBody() const { return *m_body; } private: ASTPointer<Expression> m_condition; ASTPointer<Statement> m_body; @@ -372,6 +377,8 @@ public: virtual void checkTypeRequirements() override; void setFunctionReturnParameters(ParameterList& _parameters) { m_returnParameters = &_parameters; } + ParameterList const& getFunctionReturnParameters() const { assert(m_returnParameters); return *m_returnParameters; } + Expression* getExpression() const { return m_expression.get(); } private: ASTPointer<Expression> m_expression; //< value to return, optional @@ -392,21 +399,54 @@ public: virtual void accept(ASTVisitor& _visitor) override; virtual void checkTypeRequirements() override; + VariableDeclaration const& getDeclaration() const { return *m_variable; } + Expression* getExpression() const { return m_value.get(); } + private: ASTPointer<VariableDeclaration> m_variable; ASTPointer<Expression> m_value; ///< the assigned value, can be missing }; -/// An expression, i.e. something that has a value (which can also be of type "void" in case -/// of function calls). -class Expression: public Statement +/** + * A statement that contains only an expression (i.e. an assignment, function call, ...). + */ +class ExpressionStatement: public Statement +{ +public: + ExpressionStatement(Location const& _location, ASTPointer<Expression> _expression): + Statement(_location), m_expression(_expression) {} + virtual void accept(ASTVisitor& _visitor) override; + virtual void checkTypeRequirements() override; + + Expression& getExpression() const { return *m_expression; } + +private: + ASTPointer<Expression> m_expression; +}; + +/// @} + +/// Expressions +/// @{ + +/** + * An expression, i.e. something that has a value (which can also be of type "void" in case + * of some function calls). + * @abstract + */ +class Expression: public ASTNode { public: - Expression(Location const& _location): Statement(_location), m_isLvalue(false) {} + Expression(Location const& _location): ASTNode(_location), m_isLvalue(false) {} + virtual void checkTypeRequirements() = 0; std::shared_ptr<Type const> const& getType() const { return m_type; } bool isLvalue() const { return m_isLvalue; } + /// Helper function, infer the type via @ref checkTypeRequirements and then check that it + /// is implicitly convertible to @a _expectedType. If not, throw exception. + void expectType(Type const& _expectedType); + protected: //! Inferred type of the expression, only filled after a call to checkTypeRequirements(). std::shared_ptr<Type const> m_type; @@ -415,11 +455,6 @@ protected: bool m_isLvalue; }; -/// @} - -/// Expressions -/// @{ - /// Assignment, can also be a compound assignment. /// Examples: (a = 7 + 8) or (a *= 2) class Assignment: public Expression diff --git a/ASTForward.h b/ASTForward.h index c9a780f5..2b0bd886 100644 --- a/ASTForward.h +++ b/ASTForward.h @@ -53,6 +53,7 @@ class Continue; class Break; class Return; class VariableDefinition; +class ExpressionStatement; class Expression; class Assignment; class UnaryOperation; diff --git a/ASTPrinter.cpp b/ASTPrinter.cpp index 9b545ac9..eb9d92f0 100644 --- a/ASTPrinter.cpp +++ b/ASTPrinter.cpp @@ -171,6 +171,13 @@ bool ASTPrinter::visit(VariableDefinition& _node) return goDeeper(); } +bool ASTPrinter::visit(ExpressionStatement& _node) +{ + writeLine("ExpressionStatement"); + printSourcePart(_node); + return goDeeper(); +} + bool ASTPrinter::visit(Expression& _node) { writeLine("Expression"); @@ -358,6 +365,11 @@ void ASTPrinter::endVisit(VariableDefinition&) m_indentation--; } +void ASTPrinter::endVisit(ExpressionStatement&) +{ + m_indentation--; +} + void ASTPrinter::endVisit(Expression&) { m_indentation--; diff --git a/ASTPrinter.h b/ASTPrinter.h index 74e0837f..722c80c9 100644 --- a/ASTPrinter.h +++ b/ASTPrinter.h @@ -58,6 +58,7 @@ public: bool visit(Break& _node) override; bool visit(Return& _node) override; bool visit(VariableDefinition& _node) override; + bool visit(ExpressionStatement& _node) override; bool visit(Expression& _node) override; bool visit(Assignment& _node) override; bool visit(UnaryOperation& _node) override; @@ -89,6 +90,7 @@ public: void endVisit(Break&) override; void endVisit(Return&) override; void endVisit(VariableDefinition&) override; + void endVisit(ExpressionStatement&) override; void endVisit(Expression&) override; void endVisit(Assignment&) override; void endVisit(UnaryOperation&) override; diff --git a/ASTVisitor.h b/ASTVisitor.h index a667ad39..2a765e47 100644 --- a/ASTVisitor.h +++ b/ASTVisitor.h @@ -58,6 +58,7 @@ public: virtual bool visit(Break&) { return true; } virtual bool visit(Return&) { return true; } virtual bool visit(VariableDefinition&) { return true; } + virtual bool visit(ExpressionStatement&) { return true; } virtual bool visit(Expression&) { return true; } virtual bool visit(Assignment&) { return true; } virtual bool visit(UnaryOperation&) { return true; } @@ -89,6 +90,7 @@ public: virtual void endVisit(Break&) { } virtual void endVisit(Return&) { } virtual void endVisit(VariableDefinition&) { } + virtual void endVisit(ExpressionStatement&) { } virtual void endVisit(Expression&) { } virtual void endVisit(Assignment&) { } virtual void endVisit(UnaryOperation&) { } diff --git a/CMakeLists.txt b/CMakeLists.txt index 757d0cc0..f425bba4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,8 +16,8 @@ file(GLOB HEADERS "*.h") include_directories(..) -target_link_libraries(${EXECUTABLE} devcore) -target_link_libraries(${EXECUTABLE} evmface) +# @todo we only depend on Assembly, not on all of lll +target_link_libraries(${EXECUTABLE} evmface devcore lll) install( TARGETS ${EXECUTABLE} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) diff --git a/Compiler.cpp b/Compiler.cpp index dbb38324..fea88560 100644 --- a/Compiler.cpp +++ b/Compiler.cpp @@ -17,455 +17,192 @@ /** * @author Christian <c@ethdev.com> * @date 2014 - * Solidity AST to EVM bytecode compiler. + * Solidity compiler. */ -#include <cassert> -#include <utility> +#include <algorithm> #include <libsolidity/AST.h> #include <libsolidity/Compiler.h> +#include <libsolidity/ExpressionCompiler.h> +using namespace std; namespace dev { namespace solidity { - -void CompilerContext::setLabelPosition(uint32_t _label, uint32_t _position) -{ - assert(m_labelPositions.find(_label) == m_labelPositions.end()); - m_labelPositions[_label] = _position; -} - -uint32_t CompilerContext::getLabelPosition(uint32_t _label) const -{ - auto iter = m_labelPositions.find(_label); - assert(iter != m_labelPositions.end()); - return iter->second; -} - -void ExpressionCompiler::compile(Expression& _expression) +bytes Compiler::compile(ContractDefinition& _contract) { - m_assemblyItems.clear(); - _expression.accept(*this); + Compiler compiler; + compiler.compileContract(_contract); + return compiler.m_context.getAssembledBytecode(); } -bytes ExpressionCompiler::getAssembledBytecode() const +void Compiler::compileContract(ContractDefinition& _contract) { - bytes assembled; - assembled.reserve(m_assemblyItems.size()); + m_context = CompilerContext(); // clear it just in case - // resolve label references - for (uint32_t pos = 0; pos < m_assemblyItems.size(); ++pos) - { - AssemblyItem const& item = m_assemblyItems[pos]; - if (item.getType() == AssemblyItem::Type::LABEL) - m_context.setLabelPosition(item.getLabel(), pos + 1); - } + //@todo constructor + //@todo register state variables - for (AssemblyItem const& item: m_assemblyItems) - { - if (item.getType() == AssemblyItem::Type::LABELREF) - assembled.push_back(m_context.getLabelPosition(item.getLabel())); - else - assembled.push_back(item.getData()); - } - - return assembled; + for (ASTPointer<FunctionDefinition> const& function: _contract.getDefinedFunctions()) + m_context.addFunction(*function); + appendFunctionSelector(_contract.getDefinedFunctions()); + for (ASTPointer<FunctionDefinition> const& function: _contract.getDefinedFunctions()) + function->accept(*this); } -AssemblyItems ExpressionCompiler::compileExpression(CompilerContext& _context, - Expression& _expression) +void Compiler::appendFunctionSelector(std::vector<ASTPointer<FunctionDefinition>> const&) { - ExpressionCompiler compiler(_context); - compiler.compile(_expression); - return compiler.getAssemblyItems(); + // filter public functions, and sort by name. Then select function from first byte, + // unpack arguments from calldata, push to stack and jump. Pack return values to + // output and return. } -bool ExpressionCompiler::visit(Assignment& _assignment) +bool Compiler::visit(FunctionDefinition& _function) { - m_currentLValue = nullptr; - _assignment.getLeftHandSide().accept(*this); + //@todo to simplify this, the colling convention could by changed such that + // caller puts: [retarg0] ... [retargm] [return address] [arg0] ... [argn] + // although note that this reduces the size of the visible stack - Expression& rightHandSide = _assignment.getRightHandSide(); - Token::Value op = _assignment.getAssignmentOperator(); - if (op != Token::ASSIGN) - { - // compound assignment - rightHandSide.accept(*this); - Type const& resultType = *_assignment.getType(); - cleanHigherOrderBitsIfNeeded(*rightHandSide.getType(), resultType); - appendOrdinaryBinaryOperatorCode(Token::AssignmentToBinaryOp(op), resultType); - } - else - { - append(eth::Instruction::POP); //@todo do not retrieve the value in the first place - rightHandSide.accept(*this); - } + m_context.startNewFunction(); + m_returnTag = m_context.newTag(); + m_breakTags.clear(); + m_continueTags.clear(); - storeInLValue(_assignment); - return false; -} - -void ExpressionCompiler::endVisit(UnaryOperation& _unaryOperation) -{ - //@todo type checking and creating code for an operator should be in the same place: - // the operator should know how to convert itself and to which types it applies, so - // put this code together with "Type::acceptsBinary/UnaryOperator" into a class that - // represents the operator - switch (_unaryOperation.getOperator()) - { - case Token::NOT: // ! - append(eth::Instruction::NOT); - break; - case Token::BIT_NOT: // ~ - append(eth::Instruction::BNOT); - break; - case Token::DELETE: // delete - { - // a -> a xor a (= 0). - // @todo semantics change for complex types - append(eth::Instruction::DUP1); - append(eth::Instruction::XOR); - storeInLValue(_unaryOperation); - break; - } - case Token::INC: // ++ (pre- or postfix) - case Token::DEC: // -- (pre- or postfix) - if (!_unaryOperation.isPrefixOperation()) - append(eth::Instruction::DUP1); - append(eth::Instruction::PUSH1); - append(1); - if (_unaryOperation.getOperator() == Token::INC) - append(eth::Instruction::ADD); - else - { - append(eth::Instruction::SWAP1); //@todo avoid this - append(eth::Instruction::SUB); - } - if (_unaryOperation.isPrefixOperation()) - storeInLValue(_unaryOperation); - else - moveToLValue(_unaryOperation); - break; - case Token::ADD: // + - // unary add, so basically no-op - break; - case Token::SUB: // - - append(eth::Instruction::PUSH1); - append(0); - append(eth::Instruction::SUB); - break; - default: - assert(false); // invalid operation - } -} + m_context << m_context.getFunctionEntryLabel(_function); -bool ExpressionCompiler::visit(BinaryOperation& _binaryOperation) -{ - Expression& leftExpression = _binaryOperation.getLeftExpression(); - Expression& rightExpression = _binaryOperation.getRightExpression(); - Type const& resultType = *_binaryOperation.getType(); - Token::Value const op = _binaryOperation.getOperator(); + // stack upon entry: [return address] [arg0] [arg1] ... [argn] + // reserve additional slots: [retarg0] ... [retargm] [localvar0] ... [localvarp] - if (op == Token::AND || op == Token::OR) - { - // special case: short-circuiting - appendAndOrOperatorCode(_binaryOperation); - } - else if (Token::isCompareOp(op)) - { - leftExpression.accept(*this); - rightExpression.accept(*this); + unsigned const numArguments = _function.getParameters().size(); + unsigned const numReturnValues = _function.getReturnParameters().size(); + unsigned const numLocalVariables = _function.getLocalVariables().size(); - // the types to compare have to be the same, but the resulting type is always bool - assert(*leftExpression.getType() == *rightExpression.getType()); - appendCompareOperatorCode(op, *leftExpression.getType()); - } - else - { - leftExpression.accept(*this); - cleanHigherOrderBitsIfNeeded(*leftExpression.getType(), resultType); - rightExpression.accept(*this); - cleanHigherOrderBitsIfNeeded(*rightExpression.getType(), resultType); - appendOrdinaryBinaryOperatorCode(op, resultType); - } + for (ASTPointer<VariableDeclaration> const& variable: _function.getParameters() + _function.getReturnParameters()) + m_context.addVariable(*variable); + for (VariableDeclaration const* localVariable: _function.getLocalVariables()) + m_context.addVariable(*localVariable); + m_context.initializeLocalVariables(numReturnValues + numLocalVariables); - // do not visit the child nodes, we already did that explicitly - return false; -} + _function.getBody().accept(*this); -void ExpressionCompiler::endVisit(FunctionCall& _functionCall) -{ - if (_functionCall.isTypeConversion()) - { - //@todo we only have integers and bools for now which cannot be explicitly converted - assert(_functionCall.getArguments().size() == 1); - cleanHigherOrderBitsIfNeeded(*_functionCall.getArguments().front()->getType(), - *_functionCall.getType()); - } - else - { - //@todo: arguments are already on the stack - // push return label (below arguments?) - // jump to function label - // discard all but the first function return argument - } -} + m_context << m_returnTag; -void ExpressionCompiler::endVisit(MemberAccess&) -{ + // Now we need to re-shuffle the stack. For this we keep a record of the stack layout + // that shows the target positions of the elements, where "-1" denotes that this element needs + // to be removed from the stack. + // Note that the fact that the return arguments are of increasing index is vital for this + // algorithm to work. -} + vector<int> stackLayout; + stackLayout.push_back(numReturnValues); // target of return address + stackLayout += vector<int>(numArguments, -1); // discard all arguments + for (unsigned i = 0; i < numReturnValues; ++i) + stackLayout.push_back(i); + stackLayout += vector<int>(numLocalVariables, -1); -void ExpressionCompiler::endVisit(IndexAccess&) -{ + while (stackLayout.back() != int(stackLayout.size() - 1)) + if (stackLayout.back() < 0) + { + m_context << eth::Instruction::POP; + stackLayout.pop_back(); + } + else + { + m_context << eth::swapInstruction(stackLayout.size() - stackLayout.back() - 1); + swap(stackLayout[stackLayout.back()], stackLayout.back()); + } -} + m_context << eth::Instruction::JUMP; -void ExpressionCompiler::endVisit(Identifier& _identifier) -{ - m_currentLValue = _identifier.getReferencedDeclaration(); - unsigned stackPos = stackPositionOfLValue(); - if (stackPos >= 15) //@todo correct this by fetching earlier or moving to memory - BOOST_THROW_EXCEPTION(CompilerError() << errinfo_sourceLocation(_identifier.getLocation()) - << errinfo_comment("Stack too deep.")); - appendDup(stackPos + 1); + return false; } -void ExpressionCompiler::endVisit(Literal& _literal) +bool Compiler::visit(IfStatement& _ifStatement) { - switch (_literal.getType()->getCategory()) - { - case Type::Category::INTEGER: - case Type::Category::BOOL: - { - bytes value = _literal.getType()->literalToBigEndian(_literal); - assert(value.size() <= 32); - assert(!value.empty()); - appendPush(value.size()); - append(value); - break; - } - default: - assert(false); // @todo - } + ExpressionCompiler::compileExpression(m_context, _ifStatement.getCondition()); + eth::AssemblyItem trueTag = m_context.appendConditionalJump(); + if (_ifStatement.getFalseStatement()) + _ifStatement.getFalseStatement()->accept(*this); + eth::AssemblyItem endTag = m_context.appendJump(); + m_context << trueTag; + _ifStatement.getTrueStatement().accept(*this); + m_context << endTag; + return false; } -void ExpressionCompiler::cleanHigherOrderBitsIfNeeded(const Type& _typeOnStack, const Type& _targetType) +bool Compiler::visit(WhileStatement& _whileStatement) { - // If the type of one of the operands is extended, we need to remove all - // higher-order bits that we might have ignored in previous operations. - // @todo: store in the AST whether the operand might have "dirty" higher - // order bits - - if (_typeOnStack == _targetType) - return; - if (_typeOnStack.getCategory() == Type::Category::INTEGER && - _targetType.getCategory() == Type::Category::INTEGER) - { - //@todo - } - else - { - // If we get here, there is either an implementation missing to clean higher oder bits - // for non-integer types that are explicitly convertible or we got here in error. - assert(!_typeOnStack.isExplicitlyConvertibleTo(_targetType)); - assert(false); // these types should not be convertible. - } -} + eth::AssemblyItem loopStart = m_context.newTag(); + eth::AssemblyItem loopEnd = m_context.newTag(); + m_continueTags.push_back(loopStart); + m_breakTags.push_back(loopEnd); -void ExpressionCompiler::appendAndOrOperatorCode(BinaryOperation& _binaryOperation) -{ - Token::Value const op = _binaryOperation.getOperator(); - assert(op == Token::OR || op == Token::AND); - - _binaryOperation.getLeftExpression().accept(*this); - append(eth::Instruction::DUP1); - if (op == Token::AND) - append(eth::Instruction::NOT); - uint32_t endLabel = appendConditionalJump(); - _binaryOperation.getRightExpression().accept(*this); - appendLabel(endLabel); -} + m_context << loopStart; + ExpressionCompiler::compileExpression(m_context, _whileStatement.getCondition()); + m_context << eth::Instruction::NOT; + m_context.appendConditionalJumpTo(loopEnd); -void ExpressionCompiler::appendCompareOperatorCode(Token::Value _operator, Type const& _type) -{ - if (_operator == Token::EQ || _operator == Token::NE) - { - append(eth::Instruction::EQ); - if (_operator == Token::NE) - append(eth::Instruction::NOT); - } - else - { - IntegerType const* type = dynamic_cast<IntegerType const*>(&_type); - assert(type); - bool const isSigned = type->isSigned(); + _whileStatement.getBody().accept(*this); - // note that EVM opcodes compare like "stack[0] < stack[1]", - // but our left value is at stack[1], so everyhing is reversed. - switch (_operator) - { - case Token::GTE: - append(isSigned ? eth::Instruction::SGT : eth::Instruction::GT); - append(eth::Instruction::NOT); - break; - case Token::LTE: - append(isSigned ? eth::Instruction::SLT : eth::Instruction::LT); - append(eth::Instruction::NOT); - break; - case Token::GT: - append(isSigned ? eth::Instruction::SLT : eth::Instruction::LT); - break; - case Token::LT: - append(isSigned ? eth::Instruction::SGT : eth::Instruction::GT); - break; - default: - assert(false); - } - } -} + m_context.appendJumpTo(loopStart); + m_context << loopEnd; -void ExpressionCompiler::appendOrdinaryBinaryOperatorCode(Token::Value _operator, Type const& _type) -{ - if (Token::isArithmeticOp(_operator)) - appendArithmeticOperatorCode(_operator, _type); - else if (Token::isBitOp(_operator)) - appendBitOperatorCode(_operator); - else if (Token::isShiftOp(_operator)) - appendShiftOperatorCode(_operator); - else - assert(false); // unknown binary operator + m_continueTags.pop_back(); + m_breakTags.pop_back(); + return false; } -void ExpressionCompiler::appendArithmeticOperatorCode(Token::Value _operator, Type const& _type) +bool Compiler::visit(Continue&) { - IntegerType const* type = dynamic_cast<IntegerType const*>(&_type); - assert(type); - bool const isSigned = type->isSigned(); - - switch (_operator) - { - case Token::ADD: - append(eth::Instruction::ADD); - break; - case Token::SUB: - append(eth::Instruction::SWAP1); - append(eth::Instruction::SUB); - break; - case Token::MUL: - append(eth::Instruction::MUL); - break; - case Token::DIV: - append(isSigned ? eth::Instruction::SDIV : eth::Instruction::DIV); - break; - case Token::MOD: - append(isSigned ? eth::Instruction::SMOD : eth::Instruction::MOD); - break; - default: - assert(false); - } + assert(!m_continueTags.empty()); + m_context.appendJumpTo(m_continueTags.back()); + return false; } -void ExpressionCompiler::appendBitOperatorCode(Token::Value _operator) +bool Compiler::visit(Break&) { - switch (_operator) - { - case Token::BIT_OR: - append(eth::Instruction::OR); - break; - case Token::BIT_AND: - append(eth::Instruction::AND); - break; - case Token::BIT_XOR: - append(eth::Instruction::XOR); - break; - default: - assert(false); - } + assert(!m_breakTags.empty()); + m_context.appendJumpTo(m_breakTags.back()); + return false; } -void ExpressionCompiler::appendShiftOperatorCode(Token::Value _operator) +bool Compiler::visit(Return& _return) { - switch (_operator) + //@todo modifications are needed to make this work with functions returning multiple values + if (Expression* expression = _return.getExpression()) { - case Token::SHL: - assert(false); //@todo - break; - case Token::SAR: - assert(false); //@todo - break; - default: - assert(false); + ExpressionCompiler::compileExpression(m_context, *expression); + VariableDeclaration const& firstVariable = *_return.getFunctionReturnParameters().getParameters().front(); + ExpressionCompiler::cleanHigherOrderBitsIfNeeded(*expression->getType(), *firstVariable.getType()); + int stackPosition = m_context.getStackPositionOfVariable(firstVariable); + m_context << eth::swapInstruction(stackPosition) << eth::Instruction::POP; } + m_context.appendJumpTo(m_returnTag); + return false; } -uint32_t ExpressionCompiler::appendConditionalJump() -{ - uint32_t label = m_context.dispenseNewLabel(); - append(eth::Instruction::PUSH1); - appendLabelref(label); - append(eth::Instruction::JUMPI); - return label; -} - -void ExpressionCompiler::appendPush(unsigned _number) -{ - assert(1 <= _number && _number <= 32); - append(eth::Instruction(unsigned(eth::Instruction::PUSH1) + _number - 1)); -} - -void ExpressionCompiler::appendDup(unsigned _number) -{ - assert(1 <= _number && _number <= 16); - append(eth::Instruction(unsigned(eth::Instruction::DUP1) + _number - 1)); -} - -void ExpressionCompiler::appendSwap(unsigned _number) -{ - assert(1 <= _number && _number <= 16); - append(eth::Instruction(unsigned(eth::Instruction::SWAP1) + _number - 1)); -} - -void ExpressionCompiler::append(bytes const& _data) -{ - m_assemblyItems.reserve(m_assemblyItems.size() + _data.size()); - for (byte b: _data) - append(b); -} - -void ExpressionCompiler::storeInLValue(Expression const& _expression) -{ - assert(m_currentLValue); - moveToLValue(_expression); - unsigned stackPos = stackPositionOfLValue(); - if (stackPos > 16) - BOOST_THROW_EXCEPTION(CompilerError() << errinfo_sourceLocation(_expression.getLocation()) - << errinfo_comment("Stack too deep.")); - if (stackPos >= 1) - appendDup(stackPos); -} - -void ExpressionCompiler::moveToLValue(Expression const& _expression) +bool Compiler::visit(VariableDefinition& _variableDefinition) { - assert(m_currentLValue); - unsigned stackPos = stackPositionOfLValue(); - if (stackPos > 16) - BOOST_THROW_EXCEPTION(CompilerError() << errinfo_sourceLocation(_expression.getLocation()) - << errinfo_comment("Stack too deep.")); - else if (stackPos > 0) + if (Expression* expression = _variableDefinition.getExpression()) { - appendSwap(stackPos); - append(eth::Instruction::POP); + ExpressionCompiler::compileExpression(m_context, *expression); + ExpressionCompiler::cleanHigherOrderBitsIfNeeded(*expression->getType(), + *_variableDefinition.getDeclaration().getType()); + int stackPosition = m_context.getStackPositionOfVariable(_variableDefinition.getDeclaration()); + m_context << eth::swapInstruction(stackPosition) << eth::Instruction::POP; } + return false; } -unsigned ExpressionCompiler::stackPositionOfLValue() const +bool Compiler::visit(ExpressionStatement& _expressionStatement) { - return 8; // @todo ask the context and track stack changes due to m_assemblyItems + Expression& expression = _expressionStatement.getExpression(); + ExpressionCompiler::compileExpression(m_context, expression); + if (expression.getType()->getCategory() != Type::Category::VOID) + m_context << eth::Instruction::POP; + return false; } - - } } @@ -20,133 +20,40 @@ * Solidity AST to EVM bytecode compiler. */ -#include <libevmface/Instruction.h> #include <libsolidity/ASTVisitor.h> -#include <libsolidity/Types.h> -#include <libsolidity/Token.h> +#include <libsolidity/CompilerUtilities.h> namespace dev { namespace solidity { -/// A single item of compiled code that can be assembled to a single byte value in the final -/// bytecode. Its main purpose is to inject jump labels and label references into the opcode stream, -/// which can be resolved in the final step. -class AssemblyItem +class Compiler: private ASTVisitor { public: - enum class Type - { - CODE, //< m_data is opcode, m_label is empty. - DATA, //< m_data is actual data, m_label is empty - LABEL, //< m_data is JUMPDEST opcode, m_label is id of label - LABELREF //< m_data is empty, m_label is id of label - }; - - explicit AssemblyItem(eth::Instruction _instruction) : m_type(Type::CODE), m_data(byte(_instruction)) {} - explicit AssemblyItem(byte _data): m_type(Type::DATA), m_data(_data) {} - - /// Factory functions - static AssemblyItem labelRef(uint32_t _label) { return AssemblyItem(Type::LABELREF, 0, _label); } - static AssemblyItem label(uint32_t _label) { return AssemblyItem(Type::LABEL, byte(eth::Instruction::JUMPDEST), _label); } - - Type getType() const { return m_type; } - byte getData() const { return m_data; } - uint32_t getLabel() const { return m_label; } + /// Compile the given contract and return the EVM bytecode. + static bytes compile(ContractDefinition& _contract); private: - AssemblyItem(Type _type, byte _data, uint32_t _label): m_type(_type), m_data(_data), m_label(_label) {} - - Type m_type; - byte m_data; //< data to be written to the bytecode stream (or filled by a label if this is a LABELREF) - uint32_t m_label; //< the id of a label either referenced or defined by this item + Compiler(): m_returnTag(m_context.newTag()) {} + + void compileContract(ContractDefinition& _contract); + void appendFunctionSelector(const std::vector<ASTPointer<FunctionDefinition> >& _functions); + + virtual bool visit(FunctionDefinition& _function) override; + virtual bool visit(IfStatement& _ifStatement) override; + virtual bool visit(WhileStatement& _whileStatement) override; + virtual bool visit(Continue& _continue) override; + virtual bool visit(Break& _break) override; + virtual bool visit(Return& _return) override; + virtual bool visit(VariableDefinition& _variableDefinition) override; + virtual bool visit(ExpressionStatement& _expressionStatement) override; + + bytes getAssembledBytecode() { return m_context.getAssembledBytecode(); } + + CompilerContext m_context; + std::vector<eth::AssemblyItem> m_breakTags; ///< tag to jump to for a "break" statement + std::vector<eth::AssemblyItem> m_continueTags; ///< tag to jump to for a "continue" statement + eth::AssemblyItem m_returnTag; ///< tag to jump to for a "return" statement }; -using AssemblyItems = std::vector<AssemblyItem>; - - -/// Context to be shared by all units that compile the same contract. Its current usage only -/// concerns dispensing unique jump label IDs and storing their actual positions in the bytecode -/// stream. -class CompilerContext -{ -public: - CompilerContext(): m_nextLabel(0) {} - uint32_t dispenseNewLabel() { return m_nextLabel++; } - void setLabelPosition(uint32_t _label, uint32_t _position); - uint32_t getLabelPosition(uint32_t _label) const; - -private: - uint32_t m_nextLabel; - - std::map<uint32_t, uint32_t> m_labelPositions; -}; - -/// Compiler for expressions, i.e. converts an AST tree whose root is an Expression into a stream -/// of EVM instructions. It needs a compiler context that is the same for the whole compilation -/// unit. -class ExpressionCompiler: public ASTVisitor -{ -public: - ExpressionCompiler(CompilerContext& _compilerContext): m_currentLValue(nullptr), m_context(_compilerContext) {} - - /// Compile the given expression and (re-)populate the assembly item list. - void compile(Expression& _expression); - AssemblyItems const& getAssemblyItems() const { return m_assemblyItems; } - bytes getAssembledBytecode() const; - - /// Compile the given expression and return the assembly items right away. - static AssemblyItems compileExpression(CompilerContext& _context, Expression& _expression); - -private: - virtual bool visit(Assignment& _assignment) override; - virtual void endVisit(UnaryOperation& _unaryOperation) override; - virtual bool visit(BinaryOperation& _binaryOperation) override; - virtual void endVisit(FunctionCall& _functionCall) override; - virtual void endVisit(MemberAccess& _memberAccess) override; - virtual void endVisit(IndexAccess& _indexAccess) override; - virtual void endVisit(Identifier& _identifier) override; - virtual void endVisit(Literal& _literal) override; - - /// Appends code to remove dirty higher order bits in case of an implicit promotion to a wider type. - void cleanHigherOrderBitsIfNeeded(Type const& _typeOnStack, Type const& _targetType); - - ///@{ - ///@name Append code for various operator types - void appendAndOrOperatorCode(BinaryOperation& _binaryOperation); - void appendCompareOperatorCode(Token::Value _operator, Type const& _type); - void appendOrdinaryBinaryOperatorCode(Token::Value _operator, Type const& _type); - - void appendArithmeticOperatorCode(Token::Value _operator, Type const& _type); - void appendBitOperatorCode(Token::Value _operator); - void appendShiftOperatorCode(Token::Value _operator); - /// @} - - /// Appends a JUMPI instruction to a new label and returns the label - uint32_t appendConditionalJump(); - void appendPush(unsigned _number); - void appendDup(unsigned _number); - void appendSwap(unsigned _number); - - /// Append elements to the current instruction list. - void append(eth::Instruction const& _instruction) { m_assemblyItems.push_back(AssemblyItem(_instruction)); } - void append(byte _value) { m_assemblyItems.push_back(AssemblyItem(_value)); } - void append(bytes const& _data); - void appendLabelref(byte _label) { m_assemblyItems.push_back(AssemblyItem::labelRef(_label)); } - void appendLabel(byte _label) { m_assemblyItems.push_back(AssemblyItem::label(_label)); } - - /// Stores the value on top of the stack in the current lvalue and copies that value to the - /// top of the stack again - void storeInLValue(Expression const& _expression); - /// The same as storeInLValue but do not again retrieve the value to the top of the stack. - void moveToLValue(Expression const& _expression); - /// Returns the position of @a m_currentLValue in the stack, where 0 is the top of the stack. - unsigned stackPositionOfLValue() const; - - Declaration* m_currentLValue; - AssemblyItems m_assemblyItems; - CompilerContext& m_context; -}; - - } } diff --git a/CompilerUtilities.cpp b/CompilerUtilities.cpp new file mode 100644 index 00000000..b8f57618 --- /dev/null +++ b/CompilerUtilities.cpp @@ -0,0 +1,60 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @author Christian <c@ethdev.com> + * @date 2014 + * Utilities for the solidity compiler. + */ + +#include <cassert> +#include <utility> +#include <numeric> +#include <libsolidity/AST.h> +#include <libsolidity/Compiler.h> + +using namespace std; + +namespace dev { +namespace solidity { + +void CompilerContext::initializeLocalVariables(unsigned _numVariables) +{ + if (_numVariables > 0) + { + *this << u256(0); + for (unsigned i = 1; i < _numVariables; ++i) + *this << eth::Instruction::DUP1; + m_asm.adjustDeposit(-_numVariables); + } +} + +int CompilerContext::getStackPositionOfVariable(const Declaration& _declaration) +{ + auto res = find(begin(m_localVariables), end(m_localVariables), &_declaration); + assert(res != m_localVariables.end()); + return end(m_localVariables) - res - 1 + m_asm.deposit(); +} + +eth::AssemblyItem CompilerContext::getFunctionEntryLabel(const FunctionDefinition& _function) const +{ + auto res = m_functionEntryLabels.find(&_function); + assert(res != m_functionEntryLabels.end()); + return res->second.tag(); +} + +} +} diff --git a/CompilerUtilities.h b/CompilerUtilities.h new file mode 100644 index 00000000..90367903 --- /dev/null +++ b/CompilerUtilities.h @@ -0,0 +1,83 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @author Christian <c@ethdev.com> + * @date 2014 + * Utilities for the solidity compiler. + */ + +#pragma once + +#include <libevmface/Instruction.h> +#include <liblll/Assembly.h> +#include <libsolidity/Types.h> + +namespace dev { +namespace solidity { + + +/** + * Context to be shared by all units that compile the same contract. + * It stores the generated bytecode and the position of identifiers in memory and on the stack. + */ +class CompilerContext +{ +public: + CompilerContext() {} + + void startNewFunction() { m_localVariables.clear(); m_asm.setDeposit(0); } + void initializeLocalVariables(unsigned _numVariables); + void addVariable(VariableDeclaration const& _declaration) { m_localVariables.push_back(&_declaration); } + /// Returns the distance of the given local variable from the top of the stack. + int getStackPositionOfVariable(Declaration const& _declaration); + + void addFunction(FunctionDefinition const& _function) { m_functionEntryLabels.insert(std::make_pair(&_function, m_asm.newTag())); } + eth::AssemblyItem getFunctionEntryLabel(FunctionDefinition const& _function) const; + + void adjustStackOffset(int _adjustment) { m_asm.adjustDeposit(_adjustment); } + + /// Appends a JUMPI instruction to a new tag and @returns the tag + eth::AssemblyItem appendConditionalJump() { return m_asm.appendJumpI().tag(); } + /// Appends a JUMPI instruction to @a _tag + CompilerContext& appendConditionalJumpTo(eth::AssemblyItem const& _tag) { m_asm.appendJumpI(_tag); return *this; } + /// Appends a JUMP to a new tag and @returns the tag + eth::AssemblyItem appendJump() { return m_asm.appendJump().tag(); } + /// Appends a JUMP to a specific tag + CompilerContext& appendJumpTo(eth::AssemblyItem const& _tag) { m_asm.appendJump(_tag); return *this; } + /// Appends pushing of a new tag and @returns the new tag. + eth::AssemblyItem pushNewTag() { return m_asm.append(m_asm.newPushTag()).tag(); } + /// @returns a new tag without pushing any opcodes or data + eth::AssemblyItem newTag() { return m_asm.newTag(); } + + /// Append elements to the current instruction list and adjust @a m_stackOffset. + CompilerContext& operator<<(eth::AssemblyItem const& _item) { m_asm.append(_item); return *this; } + CompilerContext& operator<<(eth::Instruction _instruction) { m_asm.append(_instruction); return *this; } + CompilerContext& operator<<(u256 const& _value) { m_asm.append(_value); return *this; } + CompilerContext& operator<<(bytes const& _data) { m_asm.append(_data); return *this; } + + bytes getAssembledBytecode() { return m_asm.assemble(); } +private: + eth::Assembly m_asm; + + /// Offsets of local variables on the stack. + std::vector<Declaration const*> m_localVariables; + /// Labels pointing to the entry points of funcitons. + std::map<FunctionDefinition const*, eth::AssemblyItem> m_functionEntryLabels; +}; + +} +} diff --git a/ExpressionCompiler.cpp b/ExpressionCompiler.cpp new file mode 100644 index 00000000..76fcc298 --- /dev/null +++ b/ExpressionCompiler.cpp @@ -0,0 +1,408 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @author Christian <c@ethdev.com> + * @date 2014 + * Solidity AST to EVM bytecode compiler for expressions. + */ + +#include <cassert> +#include <utility> +#include <numeric> +#include <libsolidity/AST.h> +#include <libsolidity/ExpressionCompiler.h> + +using namespace std; + +namespace dev { +namespace solidity { + +void ExpressionCompiler::compileExpression(CompilerContext& _context, Expression& _expression) +{ + ExpressionCompiler compiler(_context); + _expression.accept(compiler); +} + +bool ExpressionCompiler::visit(Assignment& _assignment) +{ + m_currentLValue = nullptr; + + Expression& rightHandSide = _assignment.getRightHandSide(); + rightHandSide.accept(*this); + Type const& resultType = *_assignment.getType(); + cleanHigherOrderBitsIfNeeded(*rightHandSide.getType(), resultType); + _assignment.getLeftHandSide().accept(*this); + + Token::Value op = _assignment.getAssignmentOperator(); + if (op != Token::ASSIGN) + { + // compound assignment + m_context << eth::Instruction::SWAP1; + appendOrdinaryBinaryOperatorCode(Token::AssignmentToBinaryOp(op), resultType); + } + else + m_context << eth::Instruction::POP; //@todo do not retrieve the value in the first place + + storeInLValue(_assignment); + return false; +} + +void ExpressionCompiler::endVisit(UnaryOperation& _unaryOperation) +{ + //@todo type checking and creating code for an operator should be in the same place: + // the operator should know how to convert itself and to which types it applies, so + // put this code together with "Type::acceptsBinary/UnaryOperator" into a class that + // represents the operator + switch (_unaryOperation.getOperator()) + { + case Token::NOT: // ! + m_context << eth::Instruction::NOT; + break; + case Token::BIT_NOT: // ~ + m_context << eth::Instruction::BNOT; + break; + case Token::DELETE: // delete + { + // a -> a xor a (= 0). + // @todo semantics change for complex types + m_context << eth::Instruction::DUP1 << eth::Instruction::XOR; + storeInLValue(_unaryOperation); + break; + } + case Token::INC: // ++ (pre- or postfix) + case Token::DEC: // -- (pre- or postfix) + if (!_unaryOperation.isPrefixOperation()) + m_context << eth::Instruction::DUP1; + m_context << u256(1); + if (_unaryOperation.getOperator() == Token::INC) + m_context << eth::Instruction::ADD; + else + m_context << eth::Instruction::SWAP1 << eth::Instruction::SUB; // @todo avoid the swap + if (_unaryOperation.isPrefixOperation()) + storeInLValue(_unaryOperation); + else + moveToLValue(_unaryOperation); + break; + case Token::ADD: // + + // unary add, so basically no-op + break; + case Token::SUB: // - + m_context << u256(0) << eth::Instruction::SUB; + break; + default: + assert(false); // invalid operation + } +} + +bool ExpressionCompiler::visit(BinaryOperation& _binaryOperation) +{ + Expression& leftExpression = _binaryOperation.getLeftExpression(); + Expression& rightExpression = _binaryOperation.getRightExpression(); + Type const& resultType = *_binaryOperation.getType(); + Token::Value const op = _binaryOperation.getOperator(); + + if (op == Token::AND || op == Token::OR) + { + // special case: short-circuiting + appendAndOrOperatorCode(_binaryOperation); + } + else if (Token::isCompareOp(op)) + { + leftExpression.accept(*this); + rightExpression.accept(*this); + + // the types to compare have to be the same, but the resulting type is always bool + assert(*leftExpression.getType() == *rightExpression.getType()); + appendCompareOperatorCode(op, *leftExpression.getType()); + } + else + { + leftExpression.accept(*this); + cleanHigherOrderBitsIfNeeded(*leftExpression.getType(), resultType); + rightExpression.accept(*this); + cleanHigherOrderBitsIfNeeded(*rightExpression.getType(), resultType); + appendOrdinaryBinaryOperatorCode(op, resultType); + } + + // do not visit the child nodes, we already did that explicitly + return false; +} + +bool ExpressionCompiler::visit(FunctionCall& _functionCall) +{ + if (_functionCall.isTypeConversion()) + { + //@todo we only have integers and bools for now which cannot be explicitly converted + assert(_functionCall.getArguments().size() == 1); + Expression& firstArgument = *_functionCall.getArguments().front(); + firstArgument.accept(*this); + cleanHigherOrderBitsIfNeeded(*firstArgument.getType(), *_functionCall.getType()); + } + else + { + // Calling convention: Caller pushes return address and arguments + // Callee removes them and pushes return values + m_currentLValue = nullptr; + _functionCall.getExpression().accept(*this); + FunctionDefinition const* function = dynamic_cast<FunctionDefinition*>(m_currentLValue); + assert(function); + + eth::AssemblyItem returnLabel = m_context.pushNewTag(); + std::vector<ASTPointer<Expression>> const& arguments = _functionCall.getArguments(); + assert(arguments.size() == function->getParameters().size()); + for (unsigned i = 0; i < arguments.size(); ++i) + { + arguments[i]->accept(*this); + cleanHigherOrderBitsIfNeeded(*arguments[i]->getType(), + *function->getParameters()[i]->getType()); + } + + m_context.appendJumpTo(m_context.getFunctionEntryLabel(*function)); + m_context << returnLabel; + + // callee adds return parameters, but removes arguments and return label + m_context.adjustStackOffset(function->getReturnParameters().size() - arguments.size() - 1); + + // @todo for now, the return value of a function is its first return value, so remove + // all others + for (unsigned i = 1; i < function->getReturnParameters().size(); ++i) + m_context << eth::Instruction::POP; + } + return false; +} + +void ExpressionCompiler::endVisit(MemberAccess&) +{ + +} + +void ExpressionCompiler::endVisit(IndexAccess&) +{ + +} + +void ExpressionCompiler::endVisit(Identifier& _identifier) +{ + m_currentLValue = _identifier.getReferencedDeclaration(); + switch (_identifier.getType()->getCategory()) + { + case Type::Category::BOOL: + case Type::Category::INTEGER: + case Type::Category::REAL: + { + //@todo we also have to check where to retrieve them from once we add storage variables + unsigned stackPos = stackPositionOfLValue(); + if (stackPos >= 15) //@todo correct this by fetching earlier or moving to memory + BOOST_THROW_EXCEPTION(CompilerError() << errinfo_sourceLocation(_identifier.getLocation()) + << errinfo_comment("Stack too deep.")); + m_context << eth::dupInstruction(stackPos + 1); + break; + } + default: + break; + } +} + +void ExpressionCompiler::endVisit(Literal& _literal) +{ + switch (_literal.getType()->getCategory()) + { + case Type::Category::INTEGER: + case Type::Category::BOOL: + m_context << _literal.getType()->literalValue(_literal); + break; + default: + assert(false); // @todo + } +} + +void ExpressionCompiler::cleanHigherOrderBitsIfNeeded(Type const& _typeOnStack, Type const& _targetType) +{ + // If the type of one of the operands is extended, we need to remove all + // higher-order bits that we might have ignored in previous operations. + // @todo: store in the AST whether the operand might have "dirty" higher + // order bits + + if (_typeOnStack == _targetType) + return; + if (_typeOnStack.getCategory() == Type::Category::INTEGER && + _targetType.getCategory() == Type::Category::INTEGER) + { + //@todo + } + else + { + // If we get here, there is either an implementation missing to clean higher oder bits + // for non-integer types that are explicitly convertible or we got here in error. + assert(!_typeOnStack.isExplicitlyConvertibleTo(_targetType)); + assert(false); // these types should not be convertible. + } +} + +void ExpressionCompiler::appendAndOrOperatorCode(BinaryOperation& _binaryOperation) +{ + Token::Value const op = _binaryOperation.getOperator(); + assert(op == Token::OR || op == Token::AND); + + _binaryOperation.getLeftExpression().accept(*this); + m_context << eth::Instruction::DUP1; + if (op == Token::AND) + m_context << eth::Instruction::NOT; + eth::AssemblyItem endLabel = m_context.appendConditionalJump(); + _binaryOperation.getRightExpression().accept(*this); + m_context << endLabel; +} + +void ExpressionCompiler::appendCompareOperatorCode(Token::Value _operator, Type const& _type) +{ + if (_operator == Token::EQ || _operator == Token::NE) + { + m_context << eth::Instruction::EQ; + if (_operator == Token::NE) + m_context << eth::Instruction::NOT; + } + else + { + IntegerType const* type = dynamic_cast<IntegerType const*>(&_type); + assert(type); + bool const isSigned = type->isSigned(); + + // note that EVM opcodes compare like "stack[0] < stack[1]", + // but our left value is at stack[1], so everyhing is reversed. + switch (_operator) + { + case Token::GTE: + m_context << (isSigned ? eth::Instruction::SGT : eth::Instruction::GT) + << eth::Instruction::NOT; + break; + case Token::LTE: + m_context << (isSigned ? eth::Instruction::SLT : eth::Instruction::LT) + << eth::Instruction::NOT; + break; + case Token::GT: + m_context << (isSigned ? eth::Instruction::SLT : eth::Instruction::LT); + break; + case Token::LT: + m_context << (isSigned ? eth::Instruction::SGT : eth::Instruction::GT); + break; + default: + assert(false); + } + } +} + +void ExpressionCompiler::appendOrdinaryBinaryOperatorCode(Token::Value _operator, Type const& _type) +{ + if (Token::isArithmeticOp(_operator)) + appendArithmeticOperatorCode(_operator, _type); + else if (Token::isBitOp(_operator)) + appendBitOperatorCode(_operator); + else if (Token::isShiftOp(_operator)) + appendShiftOperatorCode(_operator); + else + assert(false); // unknown binary operator +} + +void ExpressionCompiler::appendArithmeticOperatorCode(Token::Value _operator, Type const& _type) +{ + IntegerType const* type = dynamic_cast<IntegerType const*>(&_type); + assert(type); + bool const isSigned = type->isSigned(); + + switch (_operator) + { + case Token::ADD: + m_context << eth::Instruction::ADD; + break; + case Token::SUB: + m_context << eth::Instruction::SWAP1 << eth::Instruction::SUB; + break; + case Token::MUL: + m_context << eth::Instruction::MUL; + break; + case Token::DIV: + m_context << (isSigned ? eth::Instruction::SDIV : eth::Instruction::DIV); + break; + case Token::MOD: + m_context << (isSigned ? eth::Instruction::SMOD : eth::Instruction::MOD); + break; + default: + assert(false); + } +} + +void ExpressionCompiler::appendBitOperatorCode(Token::Value _operator) +{ + switch (_operator) + { + case Token::BIT_OR: + m_context << eth::Instruction::OR; + break; + case Token::BIT_AND: + m_context << eth::Instruction::AND; + break; + case Token::BIT_XOR: + m_context << eth::Instruction::XOR; + break; + default: + assert(false); + } +} + +void ExpressionCompiler::appendShiftOperatorCode(Token::Value _operator) +{ + switch (_operator) + { + case Token::SHL: + assert(false); //@todo + break; + case Token::SAR: + assert(false); //@todo + break; + default: + assert(false); + } +} + +void ExpressionCompiler::storeInLValue(Expression const& _expression) +{ + moveToLValue(_expression); + unsigned stackPos = stackPositionOfLValue(); + if (stackPos > 16) + BOOST_THROW_EXCEPTION(CompilerError() << errinfo_sourceLocation(_expression.getLocation()) + << errinfo_comment("Stack too deep.")); + m_context << eth::dupInstruction(stackPos + 1); +} + +void ExpressionCompiler::moveToLValue(Expression const& _expression) +{ + unsigned stackPos = stackPositionOfLValue(); + if (stackPos > 16) + BOOST_THROW_EXCEPTION(CompilerError() << errinfo_sourceLocation(_expression.getLocation()) + << errinfo_comment("Stack too deep.")); + else if (stackPos > 0) + m_context << eth::swapInstruction(stackPos) << eth::Instruction::POP; +} + +unsigned ExpressionCompiler::stackPositionOfLValue() const +{ + assert(m_currentLValue); + return m_context.getStackPositionOfVariable(*m_currentLValue); +} + +} +} diff --git a/ExpressionCompiler.h b/ExpressionCompiler.h new file mode 100644 index 00000000..28a04f52 --- /dev/null +++ b/ExpressionCompiler.h @@ -0,0 +1,79 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @author Christian <c@ethdev.com> + * @date 2014 + * Solidity AST to EVM bytecode compiler for expressions. + */ + +#include <libsolidity/ASTVisitor.h> +#include <libsolidity/CompilerUtilities.h> + +namespace dev { +namespace solidity { + +/// Compiler for expressions, i.e. converts an AST tree whose root is an Expression into a stream +/// of EVM instructions. It needs a compiler context that is the same for the whole compilation +/// unit. +class ExpressionCompiler: private ASTVisitor +{ +public: + /// Compile the given @a _expression into the @a _context. + static void compileExpression(CompilerContext& _context, Expression& _expression); + + /// Appends code to remove dirty higher order bits in case of an implicit promotion to a wider type. + static void cleanHigherOrderBitsIfNeeded(Type const& _typeOnStack, Type const& _targetType); + +private: + ExpressionCompiler(CompilerContext& _compilerContext): m_currentLValue(nullptr), m_context(_compilerContext) {} + + virtual bool visit(Assignment& _assignment) override; + virtual void endVisit(UnaryOperation& _unaryOperation) override; + virtual bool visit(BinaryOperation& _binaryOperation) override; + virtual bool visit(FunctionCall& _functionCall) override; + virtual void endVisit(MemberAccess& _memberAccess) override; + virtual void endVisit(IndexAccess& _indexAccess) override; + virtual void endVisit(Identifier& _identifier) override; + virtual void endVisit(Literal& _literal) override; + + ///@{ + ///@name Append code for various operator types + void appendAndOrOperatorCode(BinaryOperation& _binaryOperation); + void appendCompareOperatorCode(Token::Value _operator, Type const& _type); + void appendOrdinaryBinaryOperatorCode(Token::Value _operator, Type const& _type); + + void appendArithmeticOperatorCode(Token::Value _operator, Type const& _type); + void appendBitOperatorCode(Token::Value _operator); + void appendShiftOperatorCode(Token::Value _operator); + /// @} + + /// Stores the value on top of the stack in the current lvalue and copies that value to the + /// top of the stack again + void storeInLValue(Expression const& _expression); + /// The same as storeInLValue but do not again retrieve the value to the top of the stack. + void moveToLValue(Expression const& _expression); + /// Returns the position of @a m_currentLValue in the stack, where 0 is the top of the stack. + unsigned stackPositionOfLValue() const; + void adjustStackOffset(eth::Instruction _instruction); + + Declaration* m_currentLValue; + CompilerContext& m_context; +}; + + +} +} diff --git a/NameAndTypeResolver.cpp b/NameAndTypeResolver.cpp index 9626ca84..4b77ed13 100644 --- a/NameAndTypeResolver.cpp +++ b/NameAndTypeResolver.cpp @@ -55,12 +55,15 @@ void NameAndTypeResolver::resolveNamesAndTypes(ContractDefinition& _contract) m_currentScope = &m_scopes[function.get()]; function->getBody().checkTypeRequirements(); } + m_currentScope = &m_scopes[nullptr]; } -void NameAndTypeResolver::reset() +Declaration* NameAndTypeResolver::resolveName(ASTString const& _name, Declaration const* _scope) const { - m_scopes.clear(); - m_currentScope = nullptr; + auto iterator = m_scopes.find(_scope); + if (iterator == end(m_scopes)) + return nullptr; + return iterator->second.resolveName(_name, false); } Declaration* NameAndTypeResolver::getNameFromCurrentScope(ASTString const& _name, bool _recursive) @@ -68,8 +71,13 @@ Declaration* NameAndTypeResolver::getNameFromCurrentScope(ASTString const& _name return m_currentScope->resolveName(_name, _recursive); } +void NameAndTypeResolver::reset() +{ + m_scopes.clear(); + m_currentScope = nullptr; +} -DeclarationRegistrationHelper::DeclarationRegistrationHelper(map<ASTNode*, Scope>& _scopes, +DeclarationRegistrationHelper::DeclarationRegistrationHelper(map<ASTNode const*, Scope>& _scopes, ASTNode& _astRoot): m_scopes(_scopes), m_currentScope(&m_scopes[nullptr]) { @@ -101,27 +109,33 @@ void DeclarationRegistrationHelper::endVisit(StructDefinition&) bool DeclarationRegistrationHelper::visit(FunctionDefinition& _function) { registerDeclaration(_function, true); + m_currentFunction = &_function; return true; } void DeclarationRegistrationHelper::endVisit(FunctionDefinition&) { + m_currentFunction = nullptr; closeCurrentScope(); } -bool DeclarationRegistrationHelper::visit(VariableDeclaration& _declaration) +void DeclarationRegistrationHelper::endVisit(VariableDefinition& _variableDefinition) { - registerDeclaration(_declaration, false); - return true; + // Register the local variables with the function + // This does not fit here perfectly, but it saves us another AST visit. + assert(m_currentFunction); + m_currentFunction->addLocalVariable(_variableDefinition.getDeclaration()); } -void DeclarationRegistrationHelper::endVisit(VariableDeclaration&) +bool DeclarationRegistrationHelper::visit(VariableDeclaration& _declaration) { + registerDeclaration(_declaration, false); + return true; } void DeclarationRegistrationHelper::enterNewSubScope(ASTNode& _node) { - map<ASTNode*, Scope>::iterator iter; + map<ASTNode const*, Scope>::iterator iter; bool newlyAdded; tie(iter, newlyAdded) = m_scopes.emplace(&_node, Scope(m_currentScope)); assert(newlyAdded); diff --git a/NameAndTypeResolver.h b/NameAndTypeResolver.h index 7abcbb0c..cdc334a6 100644 --- a/NameAndTypeResolver.h +++ b/NameAndTypeResolver.h @@ -41,6 +41,14 @@ public: NameAndTypeResolver() {} void resolveNamesAndTypes(ContractDefinition& _contract); + + /// Resolves the given @a _name inside the scope @a _scope. If @a _scope is omitted, + /// the global scope is used (i.e. the one containing only the contract). + /// @returns a pointer to the declaration on success or nullptr on failure. + Declaration* resolveName(ASTString const& _name, Declaration const* _scope = nullptr) const; + + /// Resolves a name in the "current" scope. Should only be called during the initial + /// resolving phase. Declaration* getNameFromCurrentScope(ASTString const& _name, bool _recursive = true); private: @@ -48,7 +56,7 @@ private: //! Maps nodes declaring a scope to scopes, i.e. ContractDefinition, FunctionDeclaration and //! StructDefinition (@todo not yet implemented), where nullptr denotes the global scope. - std::map<ASTNode*, Scope> m_scopes; + std::map<ASTNode const*, Scope> m_scopes; Scope* m_currentScope; }; @@ -58,7 +66,7 @@ private: class DeclarationRegistrationHelper: private ASTVisitor { public: - DeclarationRegistrationHelper(std::map<ASTNode*, Scope>& _scopes, ASTNode& _astRoot); + DeclarationRegistrationHelper(std::map<ASTNode const*, Scope>& _scopes, ASTNode& _astRoot); private: bool visit(ContractDefinition& _contract); @@ -67,15 +75,16 @@ private: void endVisit(StructDefinition& _struct); bool visit(FunctionDefinition& _function); void endVisit(FunctionDefinition& _function); + void endVisit(VariableDefinition& _variableDefinition); bool visit(VariableDeclaration& _declaration); - void endVisit(VariableDeclaration& _declaration); void enterNewSubScope(ASTNode& _node); void closeCurrentScope(); void registerDeclaration(Declaration& _declaration, bool _opensScope); - std::map<ASTNode*, Scope>& m_scopes; + std::map<ASTNode const*, Scope>& m_scopes; Scope* m_currentScope; + FunctionDefinition* m_currentFunction; }; //! Resolves references to declarations (of variables and types) and also establishes the link @@ -266,9 +266,11 @@ ASTPointer<Statement> Parser::parseStatement() // starting from here, all statements must be terminated by a semicolon case Token::CONTINUE: statement = ASTNodeFactory(*this).createNode<Continue>(); + m_scanner->next(); break; case Token::BREAK: statement = ASTNodeFactory(*this).createNode<Break>(); + m_scanner->next(); break; case Token::RETURN: { @@ -283,9 +285,9 @@ ASTPointer<Statement> Parser::parseStatement() } break; default: - // distinguish between variable definition (and potentially assignment) and expressions + // distinguish between variable definition (and potentially assignment) and expression statement // (which include assignments to other expressions and pre-declared variables) - // We have a variable definition if we ge a keyword that specifies a type name, or + // We have a variable definition if we get a keyword that specifies a type name, or // in the case of a user-defined type, we have two identifiers following each other. if (m_scanner->getCurrentToken() == Token::MAPPING || m_scanner->getCurrentToken() == Token::VAR || @@ -293,8 +295,8 @@ ASTPointer<Statement> Parser::parseStatement() (m_scanner->getCurrentToken() == Token::IDENTIFIER && m_scanner->peekNextToken() == Token::IDENTIFIER)) statement = parseVariableDefinition(); - else // "ordinary" expression - statement = parseExpression(); + else // "ordinary" expression statement + statement = parseExpressionStatement(); } expectToken(Token::SEMICOLON); return statement; @@ -349,6 +351,14 @@ ASTPointer<VariableDefinition> Parser::parseVariableDefinition() return nodeFactory.createNode<VariableDefinition>(variable, value); } +ASTPointer<ExpressionStatement> Parser::parseExpressionStatement() +{ + ASTNodeFactory nodeFactory(*this); + ASTPointer<Expression> expression = parseExpression(); + nodeFactory.setEndPositionFromNode(expression); + return nodeFactory.createNode<ExpressionStatement>(expression); +} + ASTPointer<Expression> Parser::parseExpression() { ASTNodeFactory nodeFactory(*this); @@ -453,8 +463,7 @@ ASTPointer<Expression> Parser::parsePrimaryExpression() { case Token::TRUE_LITERAL: case Token::FALSE_LITERAL: - expression = nodeFactory.createNode<Literal>(token, ASTPointer<ASTString>()); - m_scanner->next(); + expression = nodeFactory.createNode<Literal>(token, getLiteralAndAdvance()); break; case Token::NUMBER: case Token::STRING_LITERAL: @@ -58,6 +58,7 @@ private: ASTPointer<IfStatement> parseIfStatement(); ASTPointer<WhileStatement> parseWhileStatement(); ASTPointer<VariableDefinition> parseVariableDefinition(); + ASTPointer<ExpressionStatement> parseExpressionStatement(); ASTPointer<Expression> parseExpression(); ASTPointer<Expression> parseBinaryExpression(int _minPrecedence = 4); ASTPointer<Expression> parseUnaryExpression(); @@ -159,14 +159,12 @@ std::string IntegerType::toString() const return prefix + dev::toString(m_bits); } -bytes IntegerType::literalToBigEndian(const Literal& _literal) const +u256 IntegerType::literalValue(const Literal& _literal) const { bigint value(_literal.getValue()); - if (!isSigned() && value < 0) - return bytes(); // @todo this should already be caught by "smallestTypeforLiteral" - //@todo check that the number of bits is correct - //@todo does "toCompactBigEndian" work for signed numbers? - return toCompactBigEndian(value); + //@todo check that the number is not too large + //@todo does this work for signed numbers? + return u256(value); } bool BoolType::isExplicitlyConvertibleTo(Type const& _convertTo) const @@ -182,14 +180,14 @@ bool BoolType::isExplicitlyConvertibleTo(Type const& _convertTo) const return isImplicitlyConvertibleTo(_convertTo); } -bytes BoolType::literalToBigEndian(const Literal& _literal) const +u256 BoolType::literalValue(const Literal& _literal) const { if (_literal.getToken() == Token::TRUE_LITERAL) - return bytes(1, 1); + return u256(1); else if (_literal.getToken() == Token::FALSE_LITERAL) - return bytes(1, 0); + return u256(0); else - return NullBytes; + assert(false); } bool ContractType::operator==(const Type& _other) const @@ -69,7 +69,7 @@ public: virtual bool operator!=(Type const& _other) const { return !this->operator ==(_other); } virtual std::string toString() const = 0; - virtual bytes literalToBigEndian(Literal const&) const { return NullBytes; } + virtual u256 literalValue(Literal const&) const { assert(false); } }; /// Any kind of integer type including hash and address. @@ -94,7 +94,7 @@ public: virtual bool operator==(Type const& _other) const override; virtual std::string toString() const override; - virtual bytes literalToBigEndian(Literal const& _literal) const override; + virtual u256 literalValue(Literal const& _literal) const override; int getNumBits() const { return m_bits; } bool isHash() const { return m_modifier == Modifier::HASH || m_modifier == Modifier::ADDRESS; } @@ -122,7 +122,7 @@ public: } virtual std::string toString() const override { return "bool"; } - virtual bytes literalToBigEndian(Literal const& _literal) const override; + virtual u256 literalValue(Literal const& _literal) const override; }; /// The type of a contract instance, there is one distinct type for each contract definition. |