diff options
author | chriseth <c@ethdev.com> | 2015-10-21 06:21:52 +0800 |
---|---|---|
committer | chriseth <c@ethdev.com> | 2015-10-21 06:46:01 +0800 |
commit | e3dffb611fe1736e3ffa170e6d8dc4dee17366bd (patch) | |
tree | b2df13e7c4c16c01b6cdc7cd5c15932031185d95 /libsolidity/parsing | |
parent | d41f8b7ce702c3b25c48d27e2e895ccdcd04e4e0 (diff) | |
download | dexon-solidity-e3dffb611fe1736e3ffa170e6d8dc4dee17366bd.tar.gz dexon-solidity-e3dffb611fe1736e3ffa170e6d8dc4dee17366bd.tar.zst dexon-solidity-e3dffb611fe1736e3ffa170e6d8dc4dee17366bd.zip |
File reorganisation.
Diffstat (limited to 'libsolidity/parsing')
-rw-r--r-- | libsolidity/parsing/Parser.cpp | 1234 | ||||
-rw-r--r-- | libsolidity/parsing/Parser.h | 164 | ||||
-rw-r--r-- | libsolidity/parsing/Scanner.cpp | 771 | ||||
-rw-r--r-- | libsolidity/parsing/Scanner.h | 224 | ||||
-rw-r--r-- | libsolidity/parsing/Token.cpp | 100 | ||||
-rw-r--r-- | libsolidity/parsing/Token.h | 407 |
6 files changed, 2900 insertions, 0 deletions
diff --git a/libsolidity/parsing/Parser.cpp b/libsolidity/parsing/Parser.cpp new file mode 100644 index 00000000..57f3e913 --- /dev/null +++ b/libsolidity/parsing/Parser.cpp @@ -0,0 +1,1234 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @author Christian <c@ethdev.com> + * @date 2014 + * Solidity parser. + */ + +#include <vector> +#include <libdevcore/Log.h> +#include <libevmasm/SourceLocation.h> +#include <libsolidity/parsing/Parser.h> +#include <libsolidity/parsing/Scanner.h> +#include <libsolidity/interface/Exceptions.h> +#include <libsolidity/interface/InterfaceHandler.h> + +using namespace std; + +namespace dev +{ +namespace solidity +{ + +/// AST node factory that also tracks the begin and end position of an AST node +/// while it is being parsed +class Parser::ASTNodeFactory +{ +public: + ASTNodeFactory(Parser const& _parser): + m_parser(_parser), m_location(_parser.position(), -1, _parser.sourceName()) {} + ASTNodeFactory(Parser const& _parser, ASTPointer<ASTNode> const& _childNode): + m_parser(_parser), m_location(_childNode->location()) {} + + void markEndPosition() { m_location.end = m_parser.endPosition(); } + void setLocation(SourceLocation const& _location) { m_location = _location; } + void setLocationEmpty() { m_location.end = m_location.start; } + /// Set the end position to the one of the given node. + void setEndPositionFromNode(ASTPointer<ASTNode> const& _node) { m_location.end = _node->location().end; } + + template <class NodeType, typename... Args> + ASTPointer<NodeType> createNode(Args&& ... _args) + { + if (m_location.end < 0) + markEndPosition(); + return make_shared<NodeType>(m_location, forward<Args>(_args)...); + } + +private: + Parser const& m_parser; + SourceLocation m_location; +}; + +ASTPointer<SourceUnit> Parser::parse(shared_ptr<Scanner> const& _scanner) +{ + try + { + m_scanner = _scanner; + ASTNodeFactory nodeFactory(*this); + vector<ASTPointer<ASTNode>> nodes; + while (m_scanner->currentToken() != Token::EOS) + { + switch (auto token = m_scanner->currentToken()) + { + case Token::Import: + nodes.push_back(parseImportDirective()); + break; + case Token::Contract: + case Token::Library: + nodes.push_back(parseContractDefinition(token == Token::Library)); + break; + default: + fatalParserError(std::string("Expected import directive or contract definition.")); + } + } + return nodeFactory.createNode<SourceUnit>(nodes); + } + catch (FatalError const& _error) + { + if (m_errors.empty()) + throw; // Something is weird here, rather throw again. + return nullptr; + } +} + +std::shared_ptr<const string> const& Parser::sourceName() const +{ + return m_scanner->sourceName(); +} + +int Parser::position() const +{ + return m_scanner->currentLocation().start; +} + +int Parser::endPosition() const +{ + return m_scanner->currentLocation().end; +} + +ASTPointer<ImportDirective> Parser::parseImportDirective() +{ + ASTNodeFactory nodeFactory(*this); + expectToken(Token::Import); + if (m_scanner->currentToken() != Token::StringLiteral) + fatalParserError(std::string("Expected string literal (URL).")); + ASTPointer<ASTString> url = getLiteralAndAdvance(); + nodeFactory.markEndPosition(); + expectToken(Token::Semicolon); + return nodeFactory.createNode<ImportDirective>(url); +} + +ASTPointer<ContractDefinition> Parser::parseContractDefinition(bool _isLibrary) +{ + ASTNodeFactory nodeFactory(*this); + ASTPointer<ASTString> docString; + if (m_scanner->currentCommentLiteral() != "") + docString = make_shared<ASTString>(m_scanner->currentCommentLiteral()); + expectToken(_isLibrary ? Token::Library : Token::Contract); + ASTPointer<ASTString> name = expectIdentifierToken(); + vector<ASTPointer<InheritanceSpecifier>> baseContracts; + vector<ASTPointer<StructDefinition>> structs; + vector<ASTPointer<EnumDefinition>> enums; + vector<ASTPointer<VariableDeclaration>> stateVariables; + vector<ASTPointer<FunctionDefinition>> functions; + vector<ASTPointer<ModifierDefinition>> modifiers; + vector<ASTPointer<EventDefinition>> events; + if (m_scanner->currentToken() == Token::Is) + do + { + m_scanner->next(); + baseContracts.push_back(parseInheritanceSpecifier()); + } + while (m_scanner->currentToken() == Token::Comma); + expectToken(Token::LBrace); + while (true) + { + Token::Value currentTokenValue= m_scanner->currentToken(); + if (currentTokenValue == Token::RBrace) + break; + else if (currentTokenValue == Token::Function) + functions.push_back(parseFunctionDefinition(name.get())); + else if (currentTokenValue == Token::Struct) + structs.push_back(parseStructDefinition()); + else if (currentTokenValue == Token::Enum) + enums.push_back(parseEnumDefinition()); + else if ( + currentTokenValue == Token::Identifier || + currentTokenValue == Token::Mapping || + Token::isElementaryTypeName(currentTokenValue) + ) + { + VarDeclParserOptions options; + options.isStateVariable = true; + options.allowInitialValue = true; + stateVariables.push_back(parseVariableDeclaration(options)); + expectToken(Token::Semicolon); + } + else if (currentTokenValue == Token::Modifier) + modifiers.push_back(parseModifierDefinition()); + else if (currentTokenValue == Token::Event) + events.push_back(parseEventDefinition()); + else + fatalParserError(std::string("Function, variable, struct or modifier declaration expected.")); + } + nodeFactory.markEndPosition(); + expectToken(Token::RBrace); + return nodeFactory.createNode<ContractDefinition>( + name, + docString, + baseContracts, + structs, + enums, + stateVariables, + functions, + modifiers, + events, + _isLibrary + ); +} + +ASTPointer<InheritanceSpecifier> Parser::parseInheritanceSpecifier() +{ + ASTNodeFactory nodeFactory(*this); + ASTPointer<Identifier> name(parseIdentifier()); + vector<ASTPointer<Expression>> arguments; + if (m_scanner->currentToken() == Token::LParen) + { + m_scanner->next(); + arguments = parseFunctionCallListArguments(); + nodeFactory.markEndPosition(); + expectToken(Token::RParen); + } + else + nodeFactory.setEndPositionFromNode(name); + return nodeFactory.createNode<InheritanceSpecifier>(name, arguments); +} + +Declaration::Visibility Parser::parseVisibilitySpecifier(Token::Value _token) +{ + Declaration::Visibility visibility(Declaration::Visibility::Default); + if (_token == Token::Public) + visibility = Declaration::Visibility::Public; + else if (_token == Token::Internal) + visibility = Declaration::Visibility::Internal; + else if (_token == Token::Private) + visibility = Declaration::Visibility::Private; + else if (_token == Token::External) + visibility = Declaration::Visibility::External; + else + solAssert(false, "Invalid visibility specifier."); + m_scanner->next(); + return visibility; +} + +ASTPointer<FunctionDefinition> Parser::parseFunctionDefinition(ASTString const* _contractName) +{ + ASTNodeFactory nodeFactory(*this); + ASTPointer<ASTString> docstring; + if (m_scanner->currentCommentLiteral() != "") + docstring = make_shared<ASTString>(m_scanner->currentCommentLiteral()); + + expectToken(Token::Function); + ASTPointer<ASTString> name; + if (m_scanner->currentToken() == Token::LParen) + name = make_shared<ASTString>(); // anonymous function + else + name = expectIdentifierToken(); + VarDeclParserOptions options; + options.allowLocationSpecifier = true; + ASTPointer<ParameterList> parameters(parseParameterList(options)); + bool isDeclaredConst = false; + Declaration::Visibility visibility(Declaration::Visibility::Default); + vector<ASTPointer<ModifierInvocation>> modifiers; + while (true) + { + Token::Value token = m_scanner->currentToken(); + if (token == Token::Const) + { + isDeclaredConst = true; + m_scanner->next(); + } + else if (token == Token::Identifier) + modifiers.push_back(parseModifierInvocation()); + else if (Token::isVisibilitySpecifier(token)) + { + if (visibility != Declaration::Visibility::Default) + fatalParserError(std::string("Multiple visibility specifiers.")); + visibility = parseVisibilitySpecifier(token); + } + else + break; + } + ASTPointer<ParameterList> returnParameters; + if (m_scanner->currentToken() == Token::Returns) + { + bool const permitEmptyParameterList = false; + m_scanner->next(); + returnParameters = parseParameterList(options, permitEmptyParameterList); + } + else + returnParameters = createEmptyParameterList(); + ASTPointer<Block> block = ASTPointer<Block>(); + nodeFactory.markEndPosition(); + if (m_scanner->currentToken() != Token::Semicolon) + { + block = parseBlock(); + nodeFactory.setEndPositionFromNode(block); + } + else + m_scanner->next(); // just consume the ';' + bool const c_isConstructor = (_contractName && *name == *_contractName); + return nodeFactory.createNode<FunctionDefinition>( + name, + visibility, + c_isConstructor, + docstring, + parameters, + isDeclaredConst, + modifiers, + returnParameters, + block + ); +} + +ASTPointer<StructDefinition> Parser::parseStructDefinition() +{ + ASTNodeFactory nodeFactory(*this); + expectToken(Token::Struct); + ASTPointer<ASTString> name = expectIdentifierToken(); + vector<ASTPointer<VariableDeclaration>> members; + expectToken(Token::LBrace); + while (m_scanner->currentToken() != Token::RBrace) + { + members.push_back(parseVariableDeclaration()); + expectToken(Token::Semicolon); + } + nodeFactory.markEndPosition(); + expectToken(Token::RBrace); + return nodeFactory.createNode<StructDefinition>(name, members); +} + +ASTPointer<EnumValue> Parser::parseEnumValue() +{ + ASTNodeFactory nodeFactory(*this); + nodeFactory.markEndPosition(); + return nodeFactory.createNode<EnumValue>(expectIdentifierToken()); +} + +ASTPointer<EnumDefinition> Parser::parseEnumDefinition() +{ + ASTNodeFactory nodeFactory(*this); + expectToken(Token::Enum); + ASTPointer<ASTString> name = expectIdentifierToken(); + vector<ASTPointer<EnumValue>> members; + expectToken(Token::LBrace); + + while (m_scanner->currentToken() != Token::RBrace) + { + members.push_back(parseEnumValue()); + if (m_scanner->currentToken() == Token::RBrace) + break; + expectToken(Token::Comma); + if (m_scanner->currentToken() != Token::Identifier) + fatalParserError(std::string("Expected Identifier after ','")); + } + + nodeFactory.markEndPosition(); + expectToken(Token::RBrace); + return nodeFactory.createNode<EnumDefinition>(name, members); +} + +ASTPointer<VariableDeclaration> Parser::parseVariableDeclaration( + VarDeclParserOptions const& _options, + ASTPointer<TypeName> const& _lookAheadArrayType +) +{ + ASTNodeFactory nodeFactory = _lookAheadArrayType ? + ASTNodeFactory(*this, _lookAheadArrayType) : ASTNodeFactory(*this); + ASTPointer<TypeName> type; + if (_lookAheadArrayType) + type = _lookAheadArrayType; + else + { + type = parseTypeName(_options.allowVar); + if (type != nullptr) + nodeFactory.setEndPositionFromNode(type); + } + bool isIndexed = false; + bool isDeclaredConst = false; + Declaration::Visibility visibility(Declaration::Visibility::Default); + VariableDeclaration::Location location = VariableDeclaration::Location::Default; + ASTPointer<ASTString> identifier; + + while (true) + { + Token::Value token = m_scanner->currentToken(); + if (_options.isStateVariable && Token::isVariableVisibilitySpecifier(token)) + { + if (visibility != Declaration::Visibility::Default) + fatalParserError(std::string("Visibility already specified.")); + visibility = parseVisibilitySpecifier(token); + } + else + { + if (_options.allowIndexed && token == Token::Indexed) + isIndexed = true; + else if (token == Token::Const) + isDeclaredConst = true; + else if (_options.allowLocationSpecifier && Token::isLocationSpecifier(token)) + { + if (location != VariableDeclaration::Location::Default) + fatalParserError(std::string("Location already specified.")); + if (!type) + fatalParserError(std::string("Location specifier needs explicit type name.")); + location = ( + token == Token::Memory ? + VariableDeclaration::Location::Memory : + VariableDeclaration::Location::Storage + ); + } + else + break; + m_scanner->next(); + } + } + nodeFactory.markEndPosition(); + + if (_options.allowEmptyName && m_scanner->currentToken() != Token::Identifier) + { + identifier = make_shared<ASTString>(""); + solAssert(type != nullptr, ""); + nodeFactory.setEndPositionFromNode(type); + } + else + identifier = expectIdentifierToken(); + ASTPointer<Expression> value; + if (_options.allowInitialValue) + { + if (m_scanner->currentToken() == Token::Assign) + { + m_scanner->next(); + value = parseExpression(); + nodeFactory.setEndPositionFromNode(value); + } + } + return nodeFactory.createNode<VariableDeclaration>( + type, + identifier, + value, + visibility, + _options.isStateVariable, + isIndexed, + isDeclaredConst, + location + ); +} + +ASTPointer<ModifierDefinition> Parser::parseModifierDefinition() +{ + ScopeGuard resetModifierFlag([this]() { m_insideModifier = false; }); + m_insideModifier = true; + + ASTNodeFactory nodeFactory(*this); + ASTPointer<ASTString> docstring; + if (m_scanner->currentCommentLiteral() != "") + docstring = make_shared<ASTString>(m_scanner->currentCommentLiteral()); + + expectToken(Token::Modifier); + ASTPointer<ASTString> name(expectIdentifierToken()); + ASTPointer<ParameterList> parameters; + if (m_scanner->currentToken() == Token::LParen) + { + VarDeclParserOptions options; + options.allowIndexed = true; + options.allowLocationSpecifier = true; + parameters = parseParameterList(options); + } + else + parameters = createEmptyParameterList(); + ASTPointer<Block> block = parseBlock(); + nodeFactory.setEndPositionFromNode(block); + return nodeFactory.createNode<ModifierDefinition>(name, docstring, parameters, block); +} + +ASTPointer<EventDefinition> Parser::parseEventDefinition() +{ + ASTNodeFactory nodeFactory(*this); + ASTPointer<ASTString> docstring; + if (m_scanner->currentCommentLiteral() != "") + docstring = make_shared<ASTString>(m_scanner->currentCommentLiteral()); + + expectToken(Token::Event); + ASTPointer<ASTString> name(expectIdentifierToken()); + ASTPointer<ParameterList> parameters; + if (m_scanner->currentToken() == Token::LParen) + { + VarDeclParserOptions options; + options.allowIndexed = true; + parameters = parseParameterList(options); + } + else + parameters = createEmptyParameterList(); + bool anonymous = false; + if (m_scanner->currentToken() == Token::Anonymous) + { + anonymous = true; + m_scanner->next(); + } + nodeFactory.markEndPosition(); + expectToken(Token::Semicolon); + return nodeFactory.createNode<EventDefinition>(name, docstring, parameters, anonymous); +} + +ASTPointer<ModifierInvocation> Parser::parseModifierInvocation() +{ + ASTNodeFactory nodeFactory(*this); + ASTPointer<Identifier> name(parseIdentifier()); + vector<ASTPointer<Expression>> arguments; + if (m_scanner->currentToken() == Token::LParen) + { + m_scanner->next(); + arguments = parseFunctionCallListArguments(); + nodeFactory.markEndPosition(); + expectToken(Token::RParen); + } + else + nodeFactory.setEndPositionFromNode(name); + return nodeFactory.createNode<ModifierInvocation>(name, arguments); +} + +ASTPointer<Identifier> Parser::parseIdentifier() +{ + ASTNodeFactory nodeFactory(*this); + nodeFactory.markEndPosition(); + return nodeFactory.createNode<Identifier>(expectIdentifierToken()); +} + +ASTPointer<TypeName> Parser::parseTypeName(bool _allowVar) +{ + ASTNodeFactory nodeFactory(*this); + ASTPointer<TypeName> type; + Token::Value token = m_scanner->currentToken(); + if (Token::isElementaryTypeName(token)) + { + type = ASTNodeFactory(*this).createNode<ElementaryTypeName>(token); + m_scanner->next(); + } + else if (token == Token::Var) + { + if (!_allowVar) + fatalParserError(std::string("Expected explicit type name.")); + m_scanner->next(); + } + else if (token == Token::Mapping) + type = parseMapping(); + else if (token == Token::Identifier) + { + ASTNodeFactory nodeFactory(*this); + nodeFactory.markEndPosition(); + vector<ASTString> identifierPath{*expectIdentifierToken()}; + while (m_scanner->currentToken() == Token::Period) + { + m_scanner->next(); + nodeFactory.markEndPosition(); + identifierPath.push_back(*expectIdentifierToken()); + } + type = nodeFactory.createNode<UserDefinedTypeName>(identifierPath); + } + else + fatalParserError(std::string("Expected type name")); + + if (type) + // Parse "[...]" postfixes for arrays. + while (m_scanner->currentToken() == Token::LBrack) + { + m_scanner->next(); + ASTPointer<Expression> length; + if (m_scanner->currentToken() != Token::RBrack) + length = parseExpression(); + nodeFactory.markEndPosition(); + expectToken(Token::RBrack); + type = nodeFactory.createNode<ArrayTypeName>(type, length); + } + return type; +} + +ASTPointer<Mapping> Parser::parseMapping() +{ + ASTNodeFactory nodeFactory(*this); + expectToken(Token::Mapping); + expectToken(Token::LParen); + if (!Token::isElementaryTypeName(m_scanner->currentToken())) + fatalParserError(std::string("Expected elementary type name for mapping key type")); + ASTPointer<ElementaryTypeName> keyType; + keyType = ASTNodeFactory(*this).createNode<ElementaryTypeName>(m_scanner->currentToken()); + m_scanner->next(); + expectToken(Token::Arrow); + bool const allowVar = false; + ASTPointer<TypeName> valueType = parseTypeName(allowVar); + nodeFactory.markEndPosition(); + expectToken(Token::RParen); + return nodeFactory.createNode<Mapping>(keyType, valueType); +} + +ASTPointer<ParameterList> Parser::parseParameterList( + VarDeclParserOptions const& _options, + bool _allowEmpty +) +{ + ASTNodeFactory nodeFactory(*this); + vector<ASTPointer<VariableDeclaration>> parameters; + VarDeclParserOptions options(_options); + options.allowEmptyName = true; + expectToken(Token::LParen); + if (!_allowEmpty || m_scanner->currentToken() != Token::RParen) + { + parameters.push_back(parseVariableDeclaration(options)); + while (m_scanner->currentToken() != Token::RParen) + { + expectToken(Token::Comma); + parameters.push_back(parseVariableDeclaration(options)); + } + } + nodeFactory.markEndPosition(); + m_scanner->next(); + return nodeFactory.createNode<ParameterList>(parameters); +} + +ASTPointer<Block> Parser::parseBlock() +{ + ASTNodeFactory nodeFactory(*this); + expectToken(Token::LBrace); + vector<ASTPointer<Statement>> statements; + while (m_scanner->currentToken() != Token::RBrace) + statements.push_back(parseStatement()); + nodeFactory.markEndPosition(); + expectToken(Token::RBrace); + return nodeFactory.createNode<Block>(statements); +} + +ASTPointer<Statement> Parser::parseStatement() +{ + ASTPointer<Statement> statement; + switch (m_scanner->currentToken()) + { + case Token::If: + return parseIfStatement(); + case Token::While: + return parseWhileStatement(); + case Token::For: + return parseForStatement(); + case Token::LBrace: + return parseBlock(); + // starting from here, all statements must be terminated by a semicolon + case Token::Continue: + statement = ASTNodeFactory(*this).createNode<Continue>(); + m_scanner->next(); + break; + case Token::Break: + statement = ASTNodeFactory(*this).createNode<Break>(); + m_scanner->next(); + break; + case Token::Return: + { + ASTNodeFactory nodeFactory(*this); + ASTPointer<Expression> expression; + if (m_scanner->next() != Token::Semicolon) + { + expression = parseExpression(); + nodeFactory.setEndPositionFromNode(expression); + } + statement = nodeFactory.createNode<Return>(expression); + break; + } + case Token::Throw: + { + statement = ASTNodeFactory(*this).createNode<Throw>(); + m_scanner->next(); + break; + } + case Token::Identifier: + if (m_insideModifier && m_scanner->currentLiteral() == "_") + { + statement = ASTNodeFactory(*this).createNode<PlaceholderStatement>(); + m_scanner->next(); + return statement; + } + // fall-through + default: + statement = parseSimpleStatement(); + } + expectToken(Token::Semicolon); + return statement; +} + +ASTPointer<IfStatement> Parser::parseIfStatement() +{ + ASTNodeFactory nodeFactory(*this); + expectToken(Token::If); + expectToken(Token::LParen); + ASTPointer<Expression> condition = parseExpression(); + expectToken(Token::RParen); + ASTPointer<Statement> trueBody = parseStatement(); + ASTPointer<Statement> falseBody; + if (m_scanner->currentToken() == Token::Else) + { + m_scanner->next(); + falseBody = parseStatement(); + nodeFactory.setEndPositionFromNode(falseBody); + } + else + nodeFactory.setEndPositionFromNode(trueBody); + return nodeFactory.createNode<IfStatement>(condition, trueBody, falseBody); +} + +ASTPointer<WhileStatement> Parser::parseWhileStatement() +{ + ASTNodeFactory nodeFactory(*this); + expectToken(Token::While); + expectToken(Token::LParen); + ASTPointer<Expression> condition = parseExpression(); + expectToken(Token::RParen); + ASTPointer<Statement> body = parseStatement(); + nodeFactory.setEndPositionFromNode(body); + return nodeFactory.createNode<WhileStatement>(condition, body); +} + +ASTPointer<ForStatement> Parser::parseForStatement() +{ + ASTNodeFactory nodeFactory(*this); + ASTPointer<Statement> initExpression; + ASTPointer<Expression> conditionExpression; + ASTPointer<ExpressionStatement> loopExpression; + expectToken(Token::For); + expectToken(Token::LParen); + + // LTODO: Maybe here have some predicate like peekExpression() instead of checking for semicolon and RParen? + if (m_scanner->currentToken() != Token::Semicolon) + initExpression = parseSimpleStatement(); + expectToken(Token::Semicolon); + + if (m_scanner->currentToken() != Token::Semicolon) + conditionExpression = parseExpression(); + expectToken(Token::Semicolon); + + if (m_scanner->currentToken() != Token::RParen) + loopExpression = parseExpressionStatement(); + expectToken(Token::RParen); + + ASTPointer<Statement> body = parseStatement(); + nodeFactory.setEndPositionFromNode(body); + return nodeFactory.createNode<ForStatement>(initExpression, + conditionExpression, + loopExpression, + body); +} + +ASTPointer<Statement> Parser::parseSimpleStatement() +{ + // These two cases are very hard to distinguish: + // x[7 * 20 + 3] a; - x[7 * 20 + 3] = 9; + // In the first case, x is a type name, in the second it is the name of a variable. + // As an extension, we can even have: + // `x.y.z[1][2] a;` and `x.y.z[1][2] = 10;` + // Where in the first, x.y.z leads to a type name where in the second, it accesses structs. + switch (peekStatementType()) + { + case LookAheadInfo::VariableDeclarationStatement: + return parseVariableDeclarationStatement(); + case LookAheadInfo::ExpressionStatement: + return parseExpressionStatement(); + default: + break; + } + + // At this point, we have 'Identifier "["' or 'Identifier "." Identifier' or 'ElementoryTypeName "["'. + // We parse '(Identifier ("." Identifier)* |ElementaryTypeName) ( "[" Expression "]" )+' + // until we can decide whether to hand this over to ExpressionStatement or create a + // VariableDeclarationStatement out of it. + + vector<ASTPointer<PrimaryExpression>> path; + bool startedWithElementary = false; + if (m_scanner->currentToken() == Token::Identifier) + path.push_back(parseIdentifier()); + else + { + startedWithElementary = true; + path.push_back(ASTNodeFactory(*this).createNode<ElementaryTypeNameExpression>(m_scanner->currentToken())); + m_scanner->next(); + } + while (!startedWithElementary && m_scanner->currentToken() == Token::Period) + { + m_scanner->next(); + path.push_back(parseIdentifier()); + } + vector<pair<ASTPointer<Expression>, SourceLocation>> indices; + while (m_scanner->currentToken() == Token::LBrack) + { + expectToken(Token::LBrack); + ASTPointer<Expression> index; + if (m_scanner->currentToken() != Token::RBrack) + index = parseExpression(); + SourceLocation indexLocation = path.front()->location(); + indexLocation.end = endPosition(); + indices.push_back(make_pair(index, indexLocation)); + expectToken(Token::RBrack); + } + + if (m_scanner->currentToken() == Token::Identifier || Token::isLocationSpecifier(m_scanner->currentToken())) + return parseVariableDeclarationStatement(typeNameIndexAccessStructure(path, indices)); + else + return parseExpressionStatement(expressionFromIndexAccessStructure(path, indices)); +} + +ASTPointer<VariableDeclarationStatement> Parser::parseVariableDeclarationStatement( + ASTPointer<TypeName> const& _lookAheadArrayType +) +{ + ASTNodeFactory nodeFactory(*this); + if (_lookAheadArrayType) + nodeFactory.setLocation(_lookAheadArrayType->location()); + vector<ASTPointer<VariableDeclaration>> variables; + ASTPointer<Expression> value; + if ( + !_lookAheadArrayType && + m_scanner->currentToken() == Token::Var && + m_scanner->peekNextToken() == Token::LParen + ) + { + // Parse `var (a, b, ,, c) = ...` into a single VariableDeclarationStatement with multiple variables. + m_scanner->next(); + m_scanner->next(); + if (m_scanner->currentToken() != Token::RParen) + while (true) + { + ASTPointer<VariableDeclaration> var; + if ( + m_scanner->currentToken() != Token::Comma && + m_scanner->currentToken() != Token::RParen + ) + { + ASTNodeFactory varDeclNodeFactory(*this); + varDeclNodeFactory.markEndPosition(); + ASTPointer<ASTString> name = expectIdentifierToken(); + var = varDeclNodeFactory.createNode<VariableDeclaration>( + ASTPointer<TypeName>(), + name, + ASTPointer<Expression>(), + VariableDeclaration::Visibility::Default + ); + } + variables.push_back(var); + if (m_scanner->currentToken() == Token::RParen) + break; + else + expectToken(Token::Comma); + } + nodeFactory.markEndPosition(); + m_scanner->next(); + } + else + { + VarDeclParserOptions options; + options.allowVar = true; + options.allowLocationSpecifier = true; + variables.push_back(parseVariableDeclaration(options, _lookAheadArrayType)); + } + if (m_scanner->currentToken() == Token::Assign) + { + m_scanner->next(); + value = parseExpression(); + nodeFactory.setEndPositionFromNode(value); + } + return nodeFactory.createNode<VariableDeclarationStatement>(variables, value); +} + +ASTPointer<ExpressionStatement> Parser::parseExpressionStatement( + ASTPointer<Expression> const& _lookAheadIndexAccessStructure +) +{ + ASTPointer<Expression> expression = parseExpression(_lookAheadIndexAccessStructure); + return ASTNodeFactory(*this, expression).createNode<ExpressionStatement>(expression); +} + +ASTPointer<Expression> Parser::parseExpression( + ASTPointer<Expression> const& _lookAheadIndexAccessStructure +) +{ + ASTPointer<Expression> expression = parseBinaryExpression(4, _lookAheadIndexAccessStructure); + if (!Token::isAssignmentOp(m_scanner->currentToken())) + return expression; + Token::Value assignmentOperator = expectAssignmentOperator(); + ASTPointer<Expression> rightHandSide = parseExpression(); + ASTNodeFactory nodeFactory(*this, expression); + nodeFactory.setEndPositionFromNode(rightHandSide); + return nodeFactory.createNode<Assignment>(expression, assignmentOperator, rightHandSide); +} + +ASTPointer<Expression> Parser::parseBinaryExpression( + int _minPrecedence, + ASTPointer<Expression> const& _lookAheadIndexAccessStructure +) +{ + ASTPointer<Expression> expression = parseUnaryExpression(_lookAheadIndexAccessStructure); + ASTNodeFactory nodeFactory(*this, expression); + int precedence = Token::precedence(m_scanner->currentToken()); + for (; precedence >= _minPrecedence; --precedence) + while (Token::precedence(m_scanner->currentToken()) == precedence) + { + Token::Value op = m_scanner->currentToken(); + m_scanner->next(); + ASTPointer<Expression> right = parseBinaryExpression(precedence + 1); + nodeFactory.setEndPositionFromNode(right); + expression = nodeFactory.createNode<BinaryOperation>(expression, op, right); + } + return expression; +} + +ASTPointer<Expression> Parser::parseUnaryExpression( + ASTPointer<Expression> const& _lookAheadIndexAccessStructure +) +{ + ASTNodeFactory nodeFactory = _lookAheadIndexAccessStructure ? + ASTNodeFactory(*this, _lookAheadIndexAccessStructure) : ASTNodeFactory(*this); + Token::Value token = m_scanner->currentToken(); + if (!_lookAheadIndexAccessStructure && (Token::isUnaryOp(token) || Token::isCountOp(token))) + { + // prefix expression + m_scanner->next(); + ASTPointer<Expression> subExpression = parseUnaryExpression(); + nodeFactory.setEndPositionFromNode(subExpression); + return nodeFactory.createNode<UnaryOperation>(token, subExpression, true); + } + else + { + // potential postfix expression + ASTPointer<Expression> subExpression = parseLeftHandSideExpression(_lookAheadIndexAccessStructure); + token = m_scanner->currentToken(); + if (!Token::isCountOp(token)) + return subExpression; + nodeFactory.markEndPosition(); + m_scanner->next(); + return nodeFactory.createNode<UnaryOperation>(token, subExpression, false); + } +} + +ASTPointer<Expression> Parser::parseLeftHandSideExpression( + ASTPointer<Expression> const& _lookAheadIndexAccessStructure +) +{ + ASTNodeFactory nodeFactory = _lookAheadIndexAccessStructure ? + ASTNodeFactory(*this, _lookAheadIndexAccessStructure) : ASTNodeFactory(*this); + + ASTPointer<Expression> expression; + if (_lookAheadIndexAccessStructure) + expression = _lookAheadIndexAccessStructure; + else if (m_scanner->currentToken() == Token::New) + { + expectToken(Token::New); + ASTPointer<Identifier> contractName(parseIdentifier()); + nodeFactory.setEndPositionFromNode(contractName); + expression = nodeFactory.createNode<NewExpression>(contractName); + } + else + expression = parsePrimaryExpression(); + + while (true) + { + switch (m_scanner->currentToken()) + { + case Token::LBrack: + { + m_scanner->next(); + ASTPointer<Expression> index; + if (m_scanner->currentToken() != Token::RBrack) + index = parseExpression(); + nodeFactory.markEndPosition(); + expectToken(Token::RBrack); + expression = nodeFactory.createNode<IndexAccess>(expression, index); + } + break; + case Token::Period: + { + m_scanner->next(); + nodeFactory.markEndPosition(); + expression = nodeFactory.createNode<MemberAccess>(expression, expectIdentifierToken()); + } + break; + case Token::LParen: + { + m_scanner->next(); + vector<ASTPointer<Expression>> arguments; + vector<ASTPointer<ASTString>> names; + std::tie(arguments, names) = parseFunctionCallArguments(); + nodeFactory.markEndPosition(); + expectToken(Token::RParen); + expression = nodeFactory.createNode<FunctionCall>(expression, arguments, names); + } + break; + default: + return expression; + } + } +} + +ASTPointer<Expression> Parser::parsePrimaryExpression() +{ + ASTNodeFactory nodeFactory(*this); + Token::Value token = m_scanner->currentToken(); + ASTPointer<Expression> expression; + switch (token) + { + case Token::TrueLiteral: + case Token::FalseLiteral: + expression = nodeFactory.createNode<Literal>(token, getLiteralAndAdvance()); + break; + case Token::Number: + if (Token::isEtherSubdenomination(m_scanner->peekNextToken())) + { + ASTPointer<ASTString> literal = getLiteralAndAdvance(); + nodeFactory.markEndPosition(); + Literal::SubDenomination subdenomination = static_cast<Literal::SubDenomination>(m_scanner->currentToken()); + m_scanner->next(); + expression = nodeFactory.createNode<Literal>(token, literal, subdenomination); + break; + } + if (Token::isTimeSubdenomination(m_scanner->peekNextToken())) + { + ASTPointer<ASTString> literal = getLiteralAndAdvance(); + nodeFactory.markEndPosition(); + Literal::SubDenomination subdenomination = static_cast<Literal::SubDenomination>(m_scanner->currentToken()); + m_scanner->next(); + expression = nodeFactory.createNode<Literal>(token, literal, subdenomination); + break; + } + // fall-through + case Token::StringLiteral: + nodeFactory.markEndPosition(); + expression = nodeFactory.createNode<Literal>(token, getLiteralAndAdvance()); + break; + case Token::Identifier: + nodeFactory.markEndPosition(); + expression = nodeFactory.createNode<Identifier>(getLiteralAndAdvance()); + break; + case Token::LParen: + { + // Tuple or parenthesized expression. + // Special cases: () is empty tuple type, (x) is not a real tuple, (x,) is one-dimensional tuple + m_scanner->next(); + vector<ASTPointer<Expression>> components; + if (m_scanner->currentToken() != Token::RParen) + while (true) + { + if (m_scanner->currentToken() != Token::Comma && m_scanner->currentToken() != Token::RParen) + components.push_back(parseExpression()); + else + components.push_back(ASTPointer<Expression>()); + if (m_scanner->currentToken() == Token::RParen) + break; + else if (m_scanner->currentToken() == Token::Comma) + m_scanner->next(); + } + nodeFactory.markEndPosition(); + expectToken(Token::RParen); + return nodeFactory.createNode<TupleExpression>(components); + } + default: + if (Token::isElementaryTypeName(token)) + { + // used for casts + expression = nodeFactory.createNode<ElementaryTypeNameExpression>(token); + m_scanner->next(); + } + else + fatalParserError(std::string("Expected primary expression.")); + break; + } + return expression; +} + +vector<ASTPointer<Expression>> Parser::parseFunctionCallListArguments() +{ + vector<ASTPointer<Expression>> arguments; + if (m_scanner->currentToken() != Token::RParen) + { + arguments.push_back(parseExpression()); + while (m_scanner->currentToken() != Token::RParen) + { + expectToken(Token::Comma); + arguments.push_back(parseExpression()); + } + } + return arguments; +} + +pair<vector<ASTPointer<Expression>>, vector<ASTPointer<ASTString>>> Parser::parseFunctionCallArguments() +{ + pair<vector<ASTPointer<Expression>>, vector<ASTPointer<ASTString>>> ret; + Token::Value token = m_scanner->currentToken(); + if (token == Token::LBrace) + { + // call({arg1 : 1, arg2 : 2 }) + expectToken(Token::LBrace); + while (m_scanner->currentToken() != Token::RBrace) + { + ret.second.push_back(expectIdentifierToken()); + expectToken(Token::Colon); + ret.first.push_back(parseExpression()); + + if (m_scanner->currentToken() == Token::Comma) + expectToken(Token::Comma); + else + break; + } + expectToken(Token::RBrace); + } + else + ret.first = parseFunctionCallListArguments(); + return ret; +} + +Parser::LookAheadInfo Parser::peekStatementType() const +{ + // Distinguish between variable declaration (and potentially assignment) and expression statement + // (which include assignments to other expressions and pre-declared variables). + // We have a variable declaration if we get a keyword that specifies a type name. + // If it is an identifier or an elementary type name followed by an identifier, we also have + // a variable declaration. + // If we get an identifier followed by a "[" or ".", it can be both ("lib.type[9] a;" or "variable.el[9] = 7;"). + // In all other cases, we have an expression statement. + Token::Value token(m_scanner->currentToken()); + bool mightBeTypeName = (Token::isElementaryTypeName(token) || token == Token::Identifier); + + if (token == Token::Mapping || token == Token::Var) + return LookAheadInfo::VariableDeclarationStatement; + if (mightBeTypeName) + { + Token::Value next = m_scanner->peekNextToken(); + if (next == Token::Identifier || Token::isLocationSpecifier(next)) + return LookAheadInfo::VariableDeclarationStatement; + if (next == Token::LBrack || next == Token::Period) + return LookAheadInfo::IndexAccessStructure; + } + return LookAheadInfo::ExpressionStatement; +} + +ASTPointer<TypeName> Parser::typeNameIndexAccessStructure( + vector<ASTPointer<PrimaryExpression>> const& _path, + vector<pair<ASTPointer<Expression>, SourceLocation>> const& _indices +) +{ + solAssert(!_path.empty(), ""); + ASTNodeFactory nodeFactory(*this); + SourceLocation location = _path.front()->location(); + location.end = _path.back()->location().end; + nodeFactory.setLocation(location); + + ASTPointer<TypeName> type; + if (auto typeName = dynamic_cast<ElementaryTypeNameExpression const*>(_path.front().get())) + { + solAssert(_path.size() == 1, ""); + type = nodeFactory.createNode<ElementaryTypeName>(typeName->typeToken()); + } + else + { + vector<ASTString> path; + for (auto const& el: _path) + path.push_back(dynamic_cast<Identifier const&>(*el).name()); + type = nodeFactory.createNode<UserDefinedTypeName>(path); + } + for (auto const& lengthExpression: _indices) + { + nodeFactory.setLocation(lengthExpression.second); + type = nodeFactory.createNode<ArrayTypeName>(type, lengthExpression.first); + } + return type; +} + +ASTPointer<Expression> Parser::expressionFromIndexAccessStructure( + vector<ASTPointer<PrimaryExpression>> const& _path, + vector<pair<ASTPointer<Expression>, SourceLocation>> const& _indices +) +{ + solAssert(!_path.empty(), ""); + ASTNodeFactory nodeFactory(*this, _path.front()); + ASTPointer<Expression> expression(_path.front()); + for (size_t i = 1; i < _path.size(); ++i) + { + SourceLocation location(_path.front()->location()); + location.end = _path[i]->location().end; + nodeFactory.setLocation(location); + Identifier const& identifier = dynamic_cast<Identifier const&>(*_path[i]); + expression = nodeFactory.createNode<MemberAccess>( + expression, + make_shared<ASTString>(identifier.name()) + ); + } + for (auto const& index: _indices) + { + nodeFactory.setLocation(index.second); + expression = nodeFactory.createNode<IndexAccess>(expression, index.first); + } + return expression; +} + +void Parser::expectToken(Token::Value _value) +{ + if (m_scanner->currentToken() != _value) + fatalParserError(std::string(string("Expected token ") + string(Token::name(_value)))); + m_scanner->next(); +} + +Token::Value Parser::expectAssignmentOperator() +{ + Token::Value op = m_scanner->currentToken(); + if (!Token::isAssignmentOp(op)) + fatalParserError(std::string("Expected assignment operator")); + m_scanner->next(); + return op; +} + +ASTPointer<ASTString> Parser::expectIdentifierToken() +{ + if (m_scanner->currentToken() != Token::Identifier) + fatalParserError(std::string("Expected identifier")); + return getLiteralAndAdvance(); +} + +ASTPointer<ASTString> Parser::getLiteralAndAdvance() +{ + ASTPointer<ASTString> identifier = make_shared<ASTString>(m_scanner->currentLiteral()); + m_scanner->next(); + return identifier; +} + +ASTPointer<ParameterList> Parser::createEmptyParameterList() +{ + ASTNodeFactory nodeFactory(*this); + nodeFactory.setLocationEmpty(); + return nodeFactory.createNode<ParameterList>(vector<ASTPointer<VariableDeclaration>>()); +} + +void Parser::parserError(string const& _description) +{ + auto err = make_shared<Error>(Error::Type::ParserError); + *err << + errinfo_sourceLocation(SourceLocation(position(), position(), sourceName())) << + errinfo_comment(_description); + + m_errors.push_back(err); +} + +void Parser::fatalParserError(string const& _description) +{ + parserError(_description); + BOOST_THROW_EXCEPTION(FatalError()); +} + +} +} diff --git a/libsolidity/parsing/Parser.h b/libsolidity/parsing/Parser.h new file mode 100644 index 00000000..5e226ba5 --- /dev/null +++ b/libsolidity/parsing/Parser.h @@ -0,0 +1,164 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @author Christian <c@ethdev.com> + * @date 2014 + * Solidity parser. + */ + +#pragma once + +#include "libsolidity/ast/AST.h" + +namespace dev +{ +namespace solidity +{ + +class Scanner; + +class Parser +{ +public: + Parser(ErrorList& errors): m_errors(errors){}; + + ASTPointer<SourceUnit> parse(std::shared_ptr<Scanner> const& _scanner); + std::shared_ptr<std::string const> const& sourceName() const; + +private: + class ASTNodeFactory; + + /// Start position of the current token + int position() const; + /// End position of the current token + int endPosition() const; + + struct VarDeclParserOptions + { + VarDeclParserOptions() {} + bool allowVar = false; + bool isStateVariable = false; + bool allowIndexed = false; + bool allowEmptyName = false; + bool allowInitialValue = false; + bool allowLocationSpecifier = false; + }; + + ///@{ + ///@name Parsing functions for the AST nodes + ASTPointer<ImportDirective> parseImportDirective(); + ASTPointer<ContractDefinition> parseContractDefinition(bool _isLibrary); + ASTPointer<InheritanceSpecifier> parseInheritanceSpecifier(); + Declaration::Visibility parseVisibilitySpecifier(Token::Value _token); + ASTPointer<FunctionDefinition> parseFunctionDefinition(ASTString const* _contractName); + ASTPointer<StructDefinition> parseStructDefinition(); + ASTPointer<EnumDefinition> parseEnumDefinition(); + ASTPointer<EnumValue> parseEnumValue(); + ASTPointer<VariableDeclaration> parseVariableDeclaration( + VarDeclParserOptions const& _options = VarDeclParserOptions(), + ASTPointer<TypeName> const& _lookAheadArrayType = ASTPointer<TypeName>() + ); + ASTPointer<ModifierDefinition> parseModifierDefinition(); + ASTPointer<EventDefinition> parseEventDefinition(); + ASTPointer<ModifierInvocation> parseModifierInvocation(); + ASTPointer<Identifier> parseIdentifier(); + ASTPointer<TypeName> parseTypeName(bool _allowVar); + ASTPointer<Mapping> parseMapping(); + ASTPointer<ParameterList> parseParameterList( + VarDeclParserOptions const& _options, + bool _allowEmpty = true + ); + ASTPointer<Block> parseBlock(); + ASTPointer<Statement> parseStatement(); + ASTPointer<IfStatement> parseIfStatement(); + ASTPointer<WhileStatement> parseWhileStatement(); + ASTPointer<ForStatement> parseForStatement(); + /// A "simple statement" can be a variable declaration statement or an expression statement. + ASTPointer<Statement> parseSimpleStatement(); + ASTPointer<VariableDeclarationStatement> parseVariableDeclarationStatement( + ASTPointer<TypeName> const& _lookAheadArrayType = ASTPointer<TypeName>() + ); + ASTPointer<ExpressionStatement> parseExpressionStatement( + ASTPointer<Expression> const& _lookAheadIndexAccessStructure = ASTPointer<Expression>() + ); + ASTPointer<Expression> parseExpression( + ASTPointer<Expression> const& _lookAheadIndexAccessStructure = ASTPointer<Expression>() + ); + ASTPointer<Expression> parseBinaryExpression(int _minPrecedence = 4, + ASTPointer<Expression> const& _lookAheadIndexAccessStructure = ASTPointer<Expression>() + ); + ASTPointer<Expression> parseUnaryExpression( + ASTPointer<Expression> const& _lookAheadIndexAccessStructure = ASTPointer<Expression>() + ); + ASTPointer<Expression> parseLeftHandSideExpression( + ASTPointer<Expression> const& _lookAheadIndexAccessStructure = ASTPointer<Expression>() + ); + ASTPointer<Expression> parsePrimaryExpression(); + std::vector<ASTPointer<Expression>> parseFunctionCallListArguments(); + std::pair<std::vector<ASTPointer<Expression>>, std::vector<ASTPointer<ASTString>>> parseFunctionCallArguments(); + ///@} + + ///@{ + ///@name Helper functions + + /// Used as return value of @see peekStatementType. + enum class LookAheadInfo + { + IndexAccessStructure, VariableDeclarationStatement, ExpressionStatement + }; + + /// Performs limited look-ahead to distinguish between variable declaration and expression statement. + /// For source code of the form "a[][8]" ("IndexAccessStructure"), this is not possible to + /// decide with constant look-ahead. + LookAheadInfo peekStatementType() const; + /// Returns a typename parsed in look-ahead fashion from something like "a.b[8][2**70]". + ASTPointer<TypeName> typeNameIndexAccessStructure( + std::vector<ASTPointer<PrimaryExpression>> const& _path, + std::vector<std::pair<ASTPointer<Expression>, SourceLocation>> const& _indices + ); + /// Returns an expression parsed in look-ahead fashion from something like "a.b[8][2**70]". + ASTPointer<Expression> expressionFromIndexAccessStructure( + std::vector<ASTPointer<PrimaryExpression>> const& _path, + std::vector<std::pair<ASTPointer<Expression>, SourceLocation>> const& _indices + ); + /// If current token value is not _value, throw exception otherwise advance token. + void expectToken(Token::Value _value); + Token::Value expectAssignmentOperator(); + ASTPointer<ASTString> expectIdentifierToken(); + ASTPointer<ASTString> getLiteralAndAdvance(); + ///@} + + /// Creates an empty ParameterList at the current location (used if parameters can be omitted). + ASTPointer<ParameterList> createEmptyParameterList(); + + /// Creates a @ref ParserError and annotates it with the current position and the + /// given @a _description. + void parserError(std::string const& _description); + + /// Creates a @ref ParserError and annotates it with the current position and the + /// given @a _description. Throws the FatalError. + void fatalParserError(std::string const& _description); + + std::shared_ptr<Scanner> m_scanner; + /// Flag that signifies whether '_' is parsed as a PlaceholderStatement or a regular identifier. + bool m_insideModifier = false; + /// The reference to the list of errors and warning to add errors/warnings during parsing + ErrorList& m_errors; +}; + +} +} diff --git a/libsolidity/parsing/Scanner.cpp b/libsolidity/parsing/Scanner.cpp new file mode 100644 index 00000000..fe0807d5 --- /dev/null +++ b/libsolidity/parsing/Scanner.cpp @@ -0,0 +1,771 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. + + This file is derived from the file "scanner.cc", which was part of the + V8 project. The original copyright header follows: + + Copyright 2006-2012, the V8 project authors. All rights reserved. + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +/** + * @author Christian <c@ethdev.com> + * @date 2014 + * Solidity scanner. + */ + +#include <algorithm> +#include <tuple> +#include <libsolidity/interface/Utils.h> +#include <libsolidity/parsing/Scanner.h> + +using namespace std; + +namespace dev +{ +namespace solidity +{ + +namespace +{ +bool isDecimalDigit(char c) +{ + return '0' <= c && c <= '9'; +} +bool isHexDigit(char c) +{ + return isDecimalDigit(c) + || ('a' <= c && c <= 'f') + || ('A' <= c && c <= 'F'); +} +bool isLineTerminator(char c) +{ + return c == '\n'; +} +bool isWhiteSpace(char c) +{ + return c == ' ' || c == '\n' || c == '\t' || c == '\r'; +} +bool isIdentifierStart(char c) +{ + return c == '_' || c == '$' || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); +} +bool isIdentifierPart(char c) +{ + return isIdentifierStart(c) || isDecimalDigit(c); +} + +int hexValue(char c) +{ + if (c >= '0' && c <= '9') + return c - '0'; + else if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + else return -1; +} +} // end anonymous namespace + + + +/// Scoped helper for literal recording. Automatically drops the literal +/// if aborting the scanning before it's complete. +enum LiteralType { + LITERAL_TYPE_STRING, + LITERAL_TYPE_NUMBER, // not really different from string type in behaviour + LITERAL_TYPE_COMMENT +}; + +class LiteralScope +{ +public: + explicit LiteralScope(Scanner* _self, enum LiteralType _type): m_type(_type) + , m_scanner(_self) + , m_complete(false) + { + if (_type == LITERAL_TYPE_COMMENT) + m_scanner->m_nextSkippedComment.literal.clear(); + else + m_scanner->m_nextToken.literal.clear(); + } + ~LiteralScope() + { + if (!m_complete) + { + if (m_type == LITERAL_TYPE_COMMENT) + m_scanner->m_nextSkippedComment.literal.clear(); + else + m_scanner->m_nextToken.literal.clear(); + } + } + void complete() { m_complete = true; } + +private: + enum LiteralType m_type; + Scanner* m_scanner; + bool m_complete; +}; // end of LiteralScope class + + +void Scanner::reset(CharStream const& _source, string const& _sourceName) +{ + m_source = _source; + m_sourceName = make_shared<string const>(_sourceName); + reset(); +} + +void Scanner::reset() +{ + m_source.reset(); + m_char = m_source.get(); + skipWhitespace(); + scanToken(); + next(); +} + +bool Scanner::scanHexByte(char& o_scannedByte) +{ + char x = 0; + for (int i = 0; i < 2; i++) + { + int d = hexValue(m_char); + if (d < 0) + { + rollback(i); + return false; + } + x = x * 16 + d; + advance(); + } + o_scannedByte = x; + return true; +} + + +// Ensure that tokens can be stored in a byte. +BOOST_STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); + +Token::Value Scanner::next() +{ + m_currentToken = m_nextToken; + m_skippedComment = m_nextSkippedComment; + scanToken(); + + return m_currentToken.token; +} + +Token::Value Scanner::selectToken(char _next, Token::Value _then, Token::Value _else) +{ + advance(); + if (m_char == _next) + return selectToken(_then); + else + return _else; +} + +bool Scanner::skipWhitespace() +{ + int const startPosition = sourcePos(); + while (isWhiteSpace(m_char)) + advance(); + // Return whether or not we skipped any characters. + return sourcePos() != startPosition; +} + +bool Scanner::skipWhitespaceExceptLF() +{ + int const startPosition = sourcePos(); + while (isWhiteSpace(m_char) && !isLineTerminator(m_char)) + advance(); + // Return whether or not we skipped any characters. + return sourcePos() != startPosition; +} + +Token::Value Scanner::skipSingleLineComment() +{ + // The line terminator at the end of the line is not considered + // to be part of the single-line comment; it is recognized + // separately by the lexical grammar and becomes part of the + // stream of input elements for the syntactic grammar + while (!isLineTerminator(m_char)) + if (!advance()) break; + + return Token::Whitespace; +} + +Token::Value Scanner::scanSingleLineDocComment() +{ + LiteralScope literal(this, LITERAL_TYPE_COMMENT); + advance(); //consume the last '/' at /// + skipWhitespaceExceptLF(); + while (!isSourcePastEndOfInput()) + { + if (isLineTerminator(m_char)) + { + // check if next line is also a documentation comment + skipWhitespace(); + if (!m_source.isPastEndOfInput(3) && + m_source.get(0) == '/' && + m_source.get(1) == '/' && + m_source.get(2) == '/') + { + addCommentLiteralChar('\n'); + m_char = m_source.advanceAndGet(3); + } + else + break; // next line is not a documentation comment, we are done + + } + addCommentLiteralChar(m_char); + advance(); + } + literal.complete(); + return Token::CommentLiteral; +} + +Token::Value Scanner::skipMultiLineComment() +{ + advance(); + while (!isSourcePastEndOfInput()) + { + char ch = m_char; + advance(); + + // If we have reached the end of the multi-line comment, we + // consume the '/' and insert a whitespace. This way all + // multi-line comments are treated as whitespace. + if (ch == '*' && m_char == '/') + { + m_char = ' '; + return Token::Whitespace; + } + } + // Unterminated multi-line comment. + return Token::Illegal; +} + +Token::Value Scanner::scanMultiLineDocComment() +{ + LiteralScope literal(this, LITERAL_TYPE_COMMENT); + bool endFound = false; + bool charsAdded = false; + + while (!isSourcePastEndOfInput()) + { + //handle newlines in multline comments + if (isLineTerminator(m_char)) + { + skipWhitespace(); + if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) != '/') + { // skip first '*' in subsequent lines + if (charsAdded) + addCommentLiteralChar('\n'); + m_char = m_source.advanceAndGet(2); + } + else if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) == '/') + { // if after newline the comment ends, don't insert the newline + m_char = m_source.advanceAndGet(2); + endFound = true; + break; + } + else if (charsAdded) + addCommentLiteralChar('\n'); + } + + if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) == '/') + { + m_char = m_source.advanceAndGet(2); + endFound = true; + break; + } + addCommentLiteralChar(m_char); + charsAdded = true; + advance(); + } + literal.complete(); + if (!endFound) + return Token::Illegal; + else + return Token::CommentLiteral; +} + +Token::Value Scanner::scanSlash() +{ + int firstSlashPosition = sourcePos(); + advance(); + if (m_char == '/') + { + if (!advance()) /* double slash comment directly before EOS */ + return Token::Whitespace; + else if (m_char == '/') + { + // doxygen style /// comment + Token::Value comment; + m_nextSkippedComment.location.start = firstSlashPosition; + comment = scanSingleLineDocComment(); + m_nextSkippedComment.location.end = sourcePos(); + m_nextSkippedComment.token = comment; + return Token::Whitespace; + } + else + return skipSingleLineComment(); + } + else if (m_char == '*') + { + // doxygen style /** natspec comment + if (!advance()) /* slash star comment before EOS */ + return Token::Whitespace; + else if (m_char == '*') + { + advance(); //consume the last '*' at /** + skipWhitespaceExceptLF(); + + // special case of a closed normal multiline comment + if (!m_source.isPastEndOfInput() && m_source.get(0) == '/') + advance(); //skip the closing slash + else // we actually have a multiline documentation comment + { + Token::Value comment; + m_nextSkippedComment.location.start = firstSlashPosition; + comment = scanMultiLineDocComment(); + m_nextSkippedComment.location.end = sourcePos(); + m_nextSkippedComment.token = comment; + } + return Token::Whitespace; + } + else + return skipMultiLineComment(); + } + else if (m_char == '=') + return selectToken(Token::AssignDiv); + else + return Token::Div; +} + +void Scanner::scanToken() +{ + m_nextToken.literal.clear(); + m_nextSkippedComment.literal.clear(); + Token::Value token; + do + { + // Remember the position of the next token + m_nextToken.location.start = sourcePos(); + switch (m_char) + { + case '\n': // fall-through + case ' ': + case '\t': + token = selectToken(Token::Whitespace); + break; + case '"': + case '\'': + token = scanString(); + break; + case '<': + // < <= << <<= + advance(); + if (m_char == '=') + token = selectToken(Token::LessThanOrEqual); + else if (m_char == '<') + token = selectToken('=', Token::AssignShl, Token::SHL); + else + token = Token::LessThan; + break; + case '>': + // > >= >> >>= >>> >>>= + advance(); + if (m_char == '=') + token = selectToken(Token::GreaterThanOrEqual); + else if (m_char == '>') + { + // >> >>= >>> >>>= + advance(); + if (m_char == '=') + token = selectToken(Token::AssignSar); + else if (m_char == '>') + token = selectToken('=', Token::AssignShr, Token::SHR); + else + token = Token::SAR; + } + else + token = Token::GreaterThan; + break; + case '=': + // = == => + advance(); + if (m_char == '=') + token = selectToken(Token::Equal); + else if (m_char == '>') + token = selectToken(Token::Arrow); + else + token = Token::Assign; + break; + case '!': + // ! != + advance(); + if (m_char == '=') + token = selectToken(Token::NotEqual); + else + token = Token::Not; + break; + case '+': + // + ++ += + advance(); + if (m_char == '+') + token = selectToken(Token::Inc); + else if (m_char == '=') + token = selectToken(Token::AssignAdd); + else + token = Token::Add; + break; + case '-': + // - -- -= + advance(); + if (m_char == '-') + token = selectToken(Token::Dec); + else if (m_char == '=') + token = selectToken(Token::AssignSub); + else + token = Token::Sub; + break; + case '*': + // * ** *= + advance(); + if (m_char == '*') + token = selectToken(Token::Exp); + else if (m_char == '=') + token = selectToken(Token::AssignMul); + else + token = Token::Mul; + break; + case '%': + // % %= + token = selectToken('=', Token::AssignMod, Token::Mod); + break; + case '/': + // / // /* /= + token = scanSlash(); + break; + case '&': + // & && &= + advance(); + if (m_char == '&') + token = selectToken(Token::And); + else if (m_char == '=') + token = selectToken(Token::AssignBitAnd); + else + token = Token::BitAnd; + break; + case '|': + // | || |= + advance(); + if (m_char == '|') + token = selectToken(Token::Or); + else if (m_char == '=') + token = selectToken(Token::AssignBitOr); + else + token = Token::BitOr; + break; + case '^': + // ^ ^= + token = selectToken('=', Token::AssignBitXor, Token::BitXor); + break; + case '.': + // . Number + advance(); + if (isDecimalDigit(m_char)) + token = scanNumber('.'); + else + token = Token::Period; + break; + case ':': + token = selectToken(Token::Colon); + break; + case ';': + token = selectToken(Token::Semicolon); + break; + case ',': + token = selectToken(Token::Comma); + break; + case '(': + token = selectToken(Token::LParen); + break; + case ')': + token = selectToken(Token::RParen); + break; + case '[': + token = selectToken(Token::LBrack); + break; + case ']': + token = selectToken(Token::RBrack); + break; + case '{': + token = selectToken(Token::LBrace); + break; + case '}': + token = selectToken(Token::RBrace); + break; + case '?': + token = selectToken(Token::Conditional); + break; + case '~': + token = selectToken(Token::BitNot); + break; + default: + if (isIdentifierStart(m_char)) + token = scanIdentifierOrKeyword(); + else if (isDecimalDigit(m_char)) + token = scanNumber(); + else if (skipWhitespace()) + token = Token::Whitespace; + else if (isSourcePastEndOfInput()) + token = Token::EOS; + else + token = selectToken(Token::Illegal); + break; + } + // Continue scanning for tokens as long as we're just skipping + // whitespace. + } + while (token == Token::Whitespace); + m_nextToken.location.end = sourcePos(); + m_nextToken.token = token; +} + +bool Scanner::scanEscape() +{ + char c = m_char; + advance(); + // Skip escaped newlines. + if (isLineTerminator(c)) + return true; + switch (c) + { + case '\'': // fall through + case '"': // fall through + case '\\': + break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + case 'x': + if (!scanHexByte(c)) + return false; + break; + } + + addLiteralChar(c); + return true; +} + +Token::Value Scanner::scanString() +{ + char const quote = m_char; + advance(); // consume quote + LiteralScope literal(this, LITERAL_TYPE_STRING); + while (m_char != quote && !isSourcePastEndOfInput() && !isLineTerminator(m_char)) + { + char c = m_char; + advance(); + if (c == '\\') + { + if (isSourcePastEndOfInput() || !scanEscape()) + return Token::Illegal; + } + else + addLiteralChar(c); + } + if (m_char != quote) + return Token::Illegal; + literal.complete(); + advance(); // consume quote + return Token::StringLiteral; +} + +void Scanner::scanDecimalDigits() +{ + while (isDecimalDigit(m_char)) + addLiteralCharAndAdvance(); +} + +Token::Value Scanner::scanNumber(char _charSeen) +{ + enum { DECIMAL, HEX, BINARY } kind = DECIMAL; + LiteralScope literal(this, LITERAL_TYPE_NUMBER); + if (_charSeen == '.') + { + // we have already seen a decimal point of the float + addLiteralChar('.'); + scanDecimalDigits(); // we know we have at least one digit + } + else + { + solAssert(_charSeen == 0, ""); + // if the first character is '0' we must check for octals and hex + if (m_char == '0') + { + addLiteralCharAndAdvance(); + // either 0, 0exxx, 0Exxx, 0.xxx or a hex number + if (m_char == 'x' || m_char == 'X') + { + // hex number + kind = HEX; + addLiteralCharAndAdvance(); + if (!isHexDigit(m_char)) + return Token::Illegal; // we must have at least one hex digit after 'x'/'X' + while (isHexDigit(m_char)) + addLiteralCharAndAdvance(); + } + } + // Parse decimal digits and allow trailing fractional part. + if (kind == DECIMAL) + { + scanDecimalDigits(); // optional + if (m_char == '.') + { + addLiteralCharAndAdvance(); + scanDecimalDigits(); // optional + } + } + } + // scan exponent, if any + if (m_char == 'e' || m_char == 'E') + { + solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number"); + if (kind != DECIMAL) + return Token::Illegal; + // scan exponent + addLiteralCharAndAdvance(); + if (m_char == '+' || m_char == '-') + addLiteralCharAndAdvance(); + if (!isDecimalDigit(m_char)) + return Token::Illegal; // we must have at least one decimal digit after 'e'/'E' + scanDecimalDigits(); + } + // The source character immediately following a numeric literal must + // not be an identifier start or a decimal digit; see ECMA-262 + // section 7.8.3, page 17 (note that we read only one decimal digit + // if the value is 0). + if (isDecimalDigit(m_char) || isIdentifierStart(m_char)) + return Token::Illegal; + literal.complete(); + return Token::Number; +} + +Token::Value Scanner::scanIdentifierOrKeyword() +{ + solAssert(isIdentifierStart(m_char), ""); + LiteralScope literal(this, LITERAL_TYPE_STRING); + addLiteralCharAndAdvance(); + // Scan the rest of the identifier characters. + while (isIdentifierPart(m_char)) + addLiteralCharAndAdvance(); + literal.complete(); + return Token::fromIdentifierOrKeyword(m_nextToken.literal); +} + +char CharStream::advanceAndGet(size_t _chars) +{ + if (isPastEndOfInput()) + return 0; + m_position += _chars; + if (isPastEndOfInput()) + return 0; + return m_source[m_position]; +} + +char CharStream::rollback(size_t _amount) +{ + solAssert(m_position >= _amount, ""); + m_position -= _amount; + return get(); +} + +string CharStream::lineAtPosition(int _position) const +{ + // if _position points to \n, it returns the line before the \n + using size_type = string::size_type; + size_type searchStart = min<size_type>(m_source.size(), _position); + if (searchStart > 0) + searchStart--; + size_type lineStart = m_source.rfind('\n', searchStart); + if (lineStart == string::npos) + lineStart = 0; + else + lineStart++; + return m_source.substr(lineStart, min(m_source.find('\n', lineStart), + m_source.size()) - lineStart); +} + +tuple<int, int> CharStream::translatePositionToLineColumn(int _position) const +{ + using size_type = string::size_type; + size_type searchPosition = min<size_type>(m_source.size(), _position); + int lineNumber = count(m_source.begin(), m_source.begin() + searchPosition, '\n'); + size_type lineStart; + if (searchPosition == 0) + lineStart = 0; + else + { + lineStart = m_source.rfind('\n', searchPosition - 1); + lineStart = lineStart == string::npos ? 0 : lineStart + 1; + } + return tuple<int, int>(lineNumber, searchPosition - lineStart); +} + + +} +} diff --git a/libsolidity/parsing/Scanner.h b/libsolidity/parsing/Scanner.h new file mode 100644 index 00000000..a1a5c9c1 --- /dev/null +++ b/libsolidity/parsing/Scanner.h @@ -0,0 +1,224 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. + + This file is derived from the file "scanner.h", which was part of the + V8 project. The original copyright header follows: + + Copyright 2006-2012, the V8 project authors. All rights reserved. + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +/** + * @author Christian <c@ethdev.com> + * @date 2014 + * Solidity scanner. + */ + +#pragma once + +#include <libdevcore/Common.h> +#include <libdevcore/Log.h> +#include <libdevcore/CommonData.h> +#include <libevmasm/SourceLocation.h> +#include <libsolidity/parsing/Token.h> + +namespace dev +{ +namespace solidity +{ + + +class AstRawString; +class AstValueFactory; +class ParserRecorder; + +class CharStream +{ +public: + CharStream(): m_position(0) {} + explicit CharStream(std::string const& _source): m_source(_source), m_position(0) {} + int position() const { return m_position; } + bool isPastEndOfInput(size_t _charsForward = 0) const { return (m_position + _charsForward) >= m_source.size(); } + char get(size_t _charsForward = 0) const { return m_source[m_position + _charsForward]; } + char advanceAndGet(size_t _chars=1); + char rollback(size_t _amount); + + void reset() { m_position = 0; } + + ///@{ + ///@name Error printing helper functions + /// Functions that help pretty-printing parse errors + /// Do only use in error cases, they are quite expensive. + std::string lineAtPosition(int _position) const; + std::tuple<int, int> translatePositionToLineColumn(int _position) const; + ///@} + +private: + std::string m_source; + size_t m_position; +}; + + + +class Scanner +{ + friend class LiteralScope; +public: + + explicit Scanner(CharStream const& _source = CharStream(), std::string const& _sourceName = "") { reset(_source, _sourceName); } + + /// Resets the scanner as if newly constructed with _source and _sourceName as input. + void reset(CharStream const& _source, std::string const& _sourceName); + /// Resets scanner to the start of input. + void reset(); + + /// Returns the next token and advances input + Token::Value next(); + + ///@{ + ///@name Information about the current token + + /// Returns the current token + Token::Value currentToken() + { + return m_currentToken.token; + } + + SourceLocation currentLocation() const { return m_currentToken.location; } + std::string const& currentLiteral() const { return m_currentToken.literal; } + ///@} + + ///@{ + ///@name Information about the current comment token + + SourceLocation currentCommentLocation() const { return m_skippedComment.location; } + std::string const& currentCommentLiteral() const { return m_skippedComment.literal; } + /// Called by the parser during FunctionDefinition parsing to clear the current comment + void clearCurrentCommentLiteral() { m_skippedComment.literal.clear(); } + + ///@} + + ///@{ + ///@name Information about the next token + + /// Returns the next token without advancing input. + Token::Value peekNextToken() const { return m_nextToken.token; } + SourceLocation peekLocation() const { return m_nextToken.location; } + std::string const& peekLiteral() const { return m_nextToken.literal; } + ///@} + + std::shared_ptr<std::string const> const& sourceName() const { return m_sourceName; } + + ///@{ + ///@name Error printing helper functions + /// Functions that help pretty-printing parse errors + /// Do only use in error cases, they are quite expensive. + std::string lineAtPosition(int _position) const { return m_source.lineAtPosition(_position); } + std::tuple<int, int> translatePositionToLineColumn(int _position) const { return m_source.translatePositionToLineColumn(_position); } + ///@} + +private: + /// Used for the current and look-ahead token and comments + struct TokenDesc + { + Token::Value token; + SourceLocation location; + std::string literal; + }; + + ///@{ + ///@name Literal buffer support + inline void addLiteralChar(char c) { m_nextToken.literal.push_back(c); } + inline void addCommentLiteralChar(char c) { m_nextSkippedComment.literal.push_back(c); } + inline void addLiteralCharAndAdvance() { addLiteralChar(m_char); advance(); } + ///@} + + bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); } + void rollback(int _amount) { m_char = m_source.rollback(_amount); } + + inline Token::Value selectToken(Token::Value _tok) { advance(); return _tok; } + /// If the next character is _next, advance and return _then, otherwise return _else. + inline Token::Value selectToken(char _next, Token::Value _then, Token::Value _else); + + bool scanHexByte(char& o_scannedByte); + + /// Scans a single Solidity token. + void scanToken(); + + /// Skips all whitespace and @returns true if something was skipped. + bool skipWhitespace(); + /// Skips all whitespace except Line feeds and returns true if something was skipped + bool skipWhitespaceExceptLF(); + Token::Value skipSingleLineComment(); + Token::Value skipMultiLineComment(); + + void scanDecimalDigits(); + Token::Value scanNumber(char _charSeen = 0); + Token::Value scanIdentifierOrKeyword(); + + Token::Value scanString(); + Token::Value scanSingleLineDocComment(); + Token::Value scanMultiLineDocComment(); + /// Scans a slash '/' and depending on the characters returns the appropriate token + Token::Value scanSlash(); + + /// Scans an escape-sequence which is part of a string and adds the + /// decoded character to the current literal. Returns true if a pattern + /// is scanned. + bool scanEscape(); + + /// Return the current source position. + int sourcePos() { return m_source.position(); } + bool isSourcePastEndOfInput() { return m_source.isPastEndOfInput(); } + + TokenDesc m_skippedComment; // desc for current skipped comment + TokenDesc m_nextSkippedComment; // desc for next skiped comment + + TokenDesc m_currentToken; // desc for current token (as returned by Next()) + TokenDesc m_nextToken; // desc for next token (one token look-ahead) + + CharStream m_source; + std::shared_ptr<std::string const> m_sourceName; + + /// one character look-ahead, equals 0 at end of input + char m_char; +}; + +} +} diff --git a/libsolidity/parsing/Token.cpp b/libsolidity/parsing/Token.cpp new file mode 100644 index 00000000..cda639fb --- /dev/null +++ b/libsolidity/parsing/Token.cpp @@ -0,0 +1,100 @@ +// Copyright 2006-2012, the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Modifications as part of cpp-ethereum under the following license: +// +// cpp-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// cpp-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. + +#include <map> +#include <libsolidity/parsing/Token.h> + +using namespace std; + +namespace dev +{ +namespace solidity +{ + +#define T(name, string, precedence) #name, +char const* const Token::m_name[NUM_TOKENS] = +{ + TOKEN_LIST(T, T) +}; +#undef T + + +#define T(name, string, precedence) string, +char const* const Token::m_string[NUM_TOKENS] = +{ + TOKEN_LIST(T, T) +}; +#undef T + + +#define T(name, string, precedence) precedence, +int8_t const Token::m_precedence[NUM_TOKENS] = +{ + TOKEN_LIST(T, T) +}; +#undef T + + +#define KT(a, b, c) 'T', +#define KK(a, b, c) 'K', +char const Token::m_tokenType[] = +{ + TOKEN_LIST(KT, KK) +}; +Token::Value Token::fromIdentifierOrKeyword(const std::string& _name) +{ + // The following macros are used inside TOKEN_LIST and cause non-keyword tokens to be ignored + // and keywords to be put inside the keywords variable. +#define KEYWORD(name, string, precedence) {string, Token::name}, +#define TOKEN(name, string, precedence) + static const map<string, Token::Value> keywords({TOKEN_LIST(TOKEN, KEYWORD)}); +#undef KEYWORD +#undef TOKEN + auto it = keywords.find(_name); + return it == keywords.end() ? Token::Identifier : it->second; +} + +#undef KT +#undef KK + +} +} diff --git a/libsolidity/parsing/Token.h b/libsolidity/parsing/Token.h new file mode 100644 index 00000000..98461fa4 --- /dev/null +++ b/libsolidity/parsing/Token.h @@ -0,0 +1,407 @@ +// Copyright 2006-2012, the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Modifications as part of cpp-ethereum under the following license: +// +// cpp-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// cpp-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. + +#pragma once + +#include <libdevcore/Common.h> +#include <libdevcore/Log.h> +#include <libsolidity/interface/Utils.h> +#include <libsolidity/interface/Exceptions.h> +#include <libdevcore/UndefMacros.h> + +namespace dev +{ +namespace solidity +{ + +// TOKEN_LIST takes a list of 3 macros M, all of which satisfy the +// same signature M(name, string, precedence), where name is the +// symbolic token name, string is the corresponding syntactic symbol +// (or NULL, for literals), and precedence is the precedence (or 0). +// The parameters are invoked for token categories as follows: +// +// T: Non-keyword tokens +// K: Keyword tokens + +// IGNORE_TOKEN is a convenience macro that can be supplied as +// an argument (at any position) for a TOKEN_LIST call. It does +// nothing with tokens belonging to the respective category. + +#define IGNORE_TOKEN(name, string, precedence) + +#define TOKEN_LIST(T, K) \ + /* End of source indicator. */ \ + T(EOS, "EOS", 0) \ + \ + /* Punctuators (ECMA-262, section 7.7, page 15). */ \ + T(LParen, "(", 0) \ + T(RParen, ")", 0) \ + T(LBrack, "[", 0) \ + T(RBrack, "]", 0) \ + T(LBrace, "{", 0) \ + T(RBrace, "}", 0) \ + T(Colon, ":", 0) \ + T(Semicolon, ";", 0) \ + T(Period, ".", 0) \ + T(Conditional, "?", 3) \ + T(Arrow, "=>", 0) \ + \ + /* Assignment operators. */ \ + /* IsAssignmentOp() relies on this block of enum values being */ \ + /* contiguous and sorted in the same order!*/ \ + T(Assign, "=", 2) \ + /* The following have to be in exactly the same order as the simple binary operators*/ \ + T(AssignBitOr, "|=", 2) \ + T(AssignBitXor, "^=", 2) \ + T(AssignBitAnd, "&=", 2) \ + T(AssignShl, "<<=", 2) \ + T(AssignSar, ">>=", 2) \ + T(AssignShr, ">>>=", 2) \ + T(AssignAdd, "+=", 2) \ + T(AssignSub, "-=", 2) \ + T(AssignMul, "*=", 2) \ + T(AssignDiv, "/=", 2) \ + T(AssignMod, "%=", 2) \ + \ + /* Binary operators sorted by precedence. */ \ + /* IsBinaryOp() relies on this block of enum values */ \ + /* being contiguous and sorted in the same order! */ \ + T(Comma, ",", 1) \ + T(Or, "||", 4) \ + T(And, "&&", 5) \ + T(BitOr, "|", 8) \ + T(BitXor, "^", 9) \ + T(BitAnd, "&", 10) \ + T(SHL, "<<", 11) \ + T(SAR, ">>", 11) \ + T(SHR, ">>>", 11) \ + T(Add, "+", 12) \ + T(Sub, "-", 12) \ + T(Mul, "*", 13) \ + T(Div, "/", 13) \ + T(Mod, "%", 13) \ + T(Exp, "**", 14) \ + \ + /* Compare operators sorted by precedence. */ \ + /* IsCompareOp() relies on this block of enum values */ \ + /* being contiguous and sorted in the same order! */ \ + T(Equal, "==", 6) \ + T(NotEqual, "!=", 6) \ + T(LessThan, "<", 7) \ + T(GreaterThan, ">", 7) \ + T(LessThanOrEqual, "<=", 7) \ + T(GreaterThanOrEqual, ">=", 7) \ + K(In, "in", 7) \ + \ + /* Unary operators. */ \ + /* IsUnaryOp() relies on this block of enum values */ \ + /* being contiguous and sorted in the same order! */ \ + T(Not, "!", 0) \ + T(BitNot, "~", 0) \ + T(Inc, "++", 0) \ + T(Dec, "--", 0) \ + K(Delete, "delete", 0) \ + \ + /* Keywords */ \ + K(Anonymous, "anonymous", 0) \ + K(Break, "break", 0) \ + K(Const, "constant", 0) \ + K(Continue, "continue", 0) \ + K(Contract, "contract", 0) \ + K(Default, "default", 0) \ + K(Do, "do", 0) \ + K(Else, "else", 0) \ + K(Enum, "enum", 0) \ + K(Event, "event", 0) \ + K(External, "external", 0) \ + K(For, "for", 0) \ + K(Function, "function", 0) \ + K(If, "if", 0) \ + K(Indexed, "indexed", 0) \ + K(Internal, "internal", 0) \ + K(Import, "import", 0) \ + K(Is, "is", 0) \ + K(Library, "library", 0) \ + K(Mapping, "mapping", 0) \ + K(Memory, "memory", 0) \ + K(Modifier, "modifier", 0) \ + K(New, "new", 0) \ + K(Public, "public", 0) \ + K(Private, "private", 0) \ + K(Return, "return", 0) \ + K(Returns, "returns", 0) \ + K(Storage, "storage", 0) \ + K(Struct, "struct", 0) \ + K(Throw, "throw", 0) \ + K(Var, "var", 0) \ + K(While, "while", 0) \ + \ + /* Ether subdenominations */ \ + K(SubWei, "wei", 0) \ + K(SubSzabo, "szabo", 0) \ + K(SubFinney, "finney", 0) \ + K(SubEther, "ether", 0) \ + K(SubSecond, "seconds", 0) \ + K(SubMinute, "minutes", 0) \ + K(SubHour, "hours", 0) \ + K(SubDay, "days", 0) \ + K(SubWeek, "weeks", 0) \ + K(SubYear, "years", 0) \ + K(After, "after", 0) \ + /* type keywords, keep them in this order, keep int as first keyword + * the implementation in Types.cpp has to be synced to this here */\ + K(Int, "int", 0) \ + K(Int8, "int8", 0) \ + K(Int16, "int16", 0) \ + K(Int24, "int24", 0) \ + K(Int32, "int32", 0) \ + K(Int40, "int40", 0) \ + K(Int48, "int48", 0) \ + K(Int56, "int56", 0) \ + K(Int64, "int64", 0) \ + K(Int72, "int72", 0) \ + K(Int80, "int80", 0) \ + K(Int88, "int88", 0) \ + K(Int96, "int96", 0) \ + K(Int104, "int104", 0) \ + K(Int112, "int112", 0) \ + K(Int120, "int120", 0) \ + K(Int128, "int128", 0) \ + K(Int136, "int136", 0) \ + K(Int144, "int144", 0) \ + K(Int152, "int152", 0) \ + K(Int160, "int160", 0) \ + K(Int168, "int168", 0) \ + K(Int176, "int178", 0) \ + K(Int184, "int184", 0) \ + K(Int192, "int192", 0) \ + K(Int200, "int200", 0) \ + K(Int208, "int208", 0) \ + K(Int216, "int216", 0) \ + K(Int224, "int224", 0) \ + K(Int232, "int232", 0) \ + K(Int240, "int240", 0) \ + K(Int248, "int248", 0) \ + K(Int256, "int256", 0) \ + K(UInt, "uint", 0) \ + K(UInt8, "uint8", 0) \ + K(UInt16, "uint16", 0) \ + K(UInt24, "uint24", 0) \ + K(UInt32, "uint32", 0) \ + K(UInt40, "uint40", 0) \ + K(UInt48, "uint48", 0) \ + K(UInt56, "uint56", 0) \ + K(UInt64, "uint64", 0) \ + K(UInt72, "uint72", 0) \ + K(UInt80, "uint80", 0) \ + K(UInt88, "uint88", 0) \ + K(UInt96, "uint96", 0) \ + K(UInt104, "uint104", 0) \ + K(UInt112, "uint112", 0) \ + K(UInt120, "uint120", 0) \ + K(UInt128, "uint128", 0) \ + K(UInt136, "uint136", 0) \ + K(UInt144, "uint144", 0) \ + K(UInt152, "uint152", 0) \ + K(UInt160, "uint160", 0) \ + K(UInt168, "uint168", 0) \ + K(UInt176, "uint178", 0) \ + K(UInt184, "uint184", 0) \ + K(UInt192, "uint192", 0) \ + K(UInt200, "uint200", 0) \ + K(UInt208, "uint208", 0) \ + K(UInt216, "uint216", 0) \ + K(UInt224, "uint224", 0) \ + K(UInt232, "uint232", 0) \ + K(UInt240, "uint240", 0) \ + K(UInt248, "uint248", 0) \ + K(UInt256, "uint256", 0) \ + K(Bytes1, "bytes1", 0) \ + K(Bytes2, "bytes2", 0) \ + K(Bytes3, "bytes3", 0) \ + K(Bytes4, "bytes4", 0) \ + K(Bytes5, "bytes5", 0) \ + K(Bytes6, "bytes6", 0) \ + K(Bytes7, "bytes7", 0) \ + K(Bytes8, "bytes8", 0) \ + K(Bytes9, "bytes9", 0) \ + K(Bytes10, "bytes10", 0) \ + K(Bytes11, "bytes11", 0) \ + K(Bytes12, "bytes12", 0) \ + K(Bytes13, "bytes13", 0) \ + K(Bytes14, "bytes14", 0) \ + K(Bytes15, "bytes15", 0) \ + K(Bytes16, "bytes16", 0) \ + K(Bytes17, "bytes17", 0) \ + K(Bytes18, "bytes18", 0) \ + K(Bytes19, "bytes19", 0) \ + K(Bytes20, "bytes20", 0) \ + K(Bytes21, "bytes21", 0) \ + K(Bytes22, "bytes22", 0) \ + K(Bytes23, "bytes23", 0) \ + K(Bytes24, "bytes24", 0) \ + K(Bytes25, "bytes25", 0) \ + K(Bytes26, "bytes26", 0) \ + K(Bytes27, "bytes27", 0) \ + K(Bytes28, "bytes28", 0) \ + K(Bytes29, "bytes29", 0) \ + K(Bytes30, "bytes30", 0) \ + K(Bytes31, "bytes31", 0) \ + K(Bytes32, "bytes32", 0) \ + K(Bytes, "bytes", 0) \ + K(Byte, "byte", 0) \ + K(String, "string", 0) \ + K(Address, "address", 0) \ + K(Bool, "bool", 0) \ + K(Real, "real", 0) \ + K(UReal, "ureal", 0) \ + T(TypesEnd, NULL, 0) /* used as type enum end marker */ \ + \ + /* Literals */ \ + K(NullLiteral, "null", 0) \ + K(TrueLiteral, "true", 0) \ + K(FalseLiteral, "false", 0) \ + T(Number, NULL, 0) \ + T(StringLiteral, NULL, 0) \ + T(CommentLiteral, NULL, 0) \ + \ + /* Identifiers (not keywords or future reserved words). */ \ + T(Identifier, NULL, 0) \ + \ + /* Keywords reserved for future use. */ \ + K(As, "as", 0) \ + K(Case, "case", 0) \ + K(Catch, "catch", 0) \ + K(Final, "final", 0) \ + K(Let, "let", 0) \ + K(Match, "match", 0) \ + K(Of, "of", 0) \ + K(Relocatable, "relocatable", 0) \ + K(Switch, "switch", 0) \ + K(Try, "try", 0) \ + K(Type, "type", 0) \ + K(TypeOf, "typeof", 0) \ + K(Using, "using", 0) \ + /* Illegal token - not able to scan. */ \ + T(Illegal, "ILLEGAL", 0) \ + \ + /* Scanner-internal use only. */ \ + T(Whitespace, NULL, 0) + + +class Token +{ +public: + // All token values. + // attention! msvc issue: + // http://stackoverflow.com/questions/9567868/compile-errors-after-adding-v8-to-my-project-c2143-c2059 + // @todo: avoid TOKEN_LIST macro +#define T(name, string, precedence) name, + enum Value + { + TOKEN_LIST(T, T) + NUM_TOKENS + }; +#undef T + + // Returns a string corresponding to the C++ token name + // (e.g. "LT" for the token LT). + static char const* name(Value tok) + { + solAssert(tok < NUM_TOKENS, ""); + return m_name[tok]; + } + + // Predicates + static bool isElementaryTypeName(Value tok) { return Int <= tok && tok < TypesEnd; } + static bool isAssignmentOp(Value tok) { return Assign <= tok && tok <= AssignMod; } + static bool isBinaryOp(Value op) { return Comma <= op && op <= Exp; } + static bool isCommutativeOp(Value op) { return op == BitOr || op == BitXor || op == BitAnd || + op == Add || op == Mul || op == Equal || op == NotEqual; } + static bool isArithmeticOp(Value op) { return Add <= op && op <= Exp; } + static bool isCompareOp(Value op) { return Equal <= op && op <= In; } + + static Value AssignmentToBinaryOp(Value op) + { + solAssert(isAssignmentOp(op) && op != Assign, ""); + return Value(op + (BitOr - AssignBitOr)); + } + + static bool isBitOp(Value op) { return (BitOr <= op && op <= SHR) || op == BitNot; } + static bool isBooleanOp(Value op) { return (Or <= op && op <= And) || op == Not; } + static bool isUnaryOp(Value op) { return (Not <= op && op <= Delete) || op == Add || op == Sub || op == After; } + static bool isCountOp(Value op) { return op == Inc || op == Dec; } + static bool isShiftOp(Value op) { return (SHL <= op) && (op <= SHR); } + static bool isVisibilitySpecifier(Value op) { return isVariableVisibilitySpecifier(op) || op == External; } + static bool isVariableVisibilitySpecifier(Value op) { return op == Public || op == Private || op == Internal; } + static bool isLocationSpecifier(Value op) { return op == Memory || op == Storage; } + static bool isEtherSubdenomination(Value op) { return op == SubWei || op == SubSzabo || op == SubFinney || op == SubEther; } + static bool isTimeSubdenomination(Value op) { return op == SubSecond || op == SubMinute || op == SubHour || op == SubDay || op == SubWeek || op == SubYear; } + + // Returns a string corresponding to the JS token string + // (.e., "<" for the token LT) or NULL if the token doesn't + // have a (unique) string (e.g. an IDENTIFIER). + static char const* toString(Value tok) + { + solAssert(tok < NUM_TOKENS, ""); + return m_string[tok]; + } + + // Returns the precedence > 0 for binary and compare + // operators; returns 0 otherwise. + static int precedence(Value tok) + { + solAssert(tok < NUM_TOKENS, ""); + return m_precedence[tok]; + } + + static Token::Value fromIdentifierOrKeyword(std::string const& _name); + +private: + static char const* const m_name[NUM_TOKENS]; + static char const* const m_string[NUM_TOKENS]; + static int8_t const m_precedence[NUM_TOKENS]; + static char const m_tokenType[NUM_TOKENS]; +}; + +} +} |