diff options
author | chriseth <chris@ethereum.org> | 2018-11-26 21:17:08 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-11-26 21:17:08 +0800 |
commit | 9ac7c748f8b954e712ba78f1431a1b5151a4ad86 (patch) | |
tree | 91dd905c5fefab466ff496edab884e0d8d8e608c | |
parent | 0b474d52994028392402ca1e7f56b613ab365dec (diff) | |
parent | 0ad56bca790fa88d3baa7f46dc47c104baedd824 (diff) | |
download | dexon-solidity-9ac7c748f8b954e712ba78f1431a1b5151a4ad86.tar.gz dexon-solidity-9ac7c748f8b954e712ba78f1431a1b5151a4ad86.tar.zst dexon-solidity-9ac7c748f8b954e712ba78f1431a1b5151a4ad86.zip |
Merge pull request #5494 from ethereum/scanner-error-handling
Improved Scanner error diagnostics.
12 files changed, 98 insertions, 32 deletions
diff --git a/liblangutil/Scanner.cpp b/liblangutil/Scanner.cpp index 3d7527d4..215171b3 100644 --- a/liblangutil/Scanner.cpp +++ b/liblangutil/Scanner.cpp @@ -53,6 +53,7 @@ #include <liblangutil/Exceptions.h> #include <liblangutil/Scanner.h> #include <algorithm> +#include <ostream> #include <tuple> using namespace std; @@ -100,7 +101,32 @@ int hexValue(char c) } } // end anonymous namespace +std::string to_string(ScannerError _errorCode) +{ + switch (_errorCode) + { + case ScannerError::NoError: return "No error."; + case ScannerError::IllegalToken: return "Invalid token."; + case ScannerError::IllegalHexString: return "Expected even number of hex-nibbles within double-quotes."; + case ScannerError::IllegalHexDigit: return "Hexadecimal digit missing or invalid."; + case ScannerError::IllegalCommentTerminator: return "Expected multi-line comment-terminator."; + case ScannerError::IllegalEscapeSequence: return "Invalid escape sequence."; + case ScannerError::IllegalStringEndQuote: return "Expected string end-quote."; + case ScannerError::IllegalNumberSeparator: return "Invalid use of number separator '_'."; + case ScannerError::IllegalExponent: return "Invalid exponent."; + case ScannerError::IllegalNumberEnd: return "Identifier-start is not allowed at end of a number."; + case ScannerError::OctalNotAllowed: return "Octal numbers not allowed."; + default: + solAssert(false, "Unhandled case in to_string(ScannerError)"); + return ""; + } +} +std::ostream& operator<<(std::ostream& os, ScannerError _errorCode) +{ + os << to_string(_errorCode); + return os; +} /// Scoped helper for literal recording. Automatically drops the literal /// if aborting the scanning before it's complete. @@ -311,7 +337,7 @@ Token Scanner::skipMultiLineComment() } } // Unterminated multi-line comment. - return Token::Illegal; + return setError(ScannerError::IllegalCommentTerminator); } Token Scanner::scanMultiLineDocComment() @@ -362,7 +388,7 @@ Token Scanner::scanMultiLineDocComment() } literal.complete(); if (!endFound) - return Token::Illegal; + return setError(ScannerError::IllegalCommentTerminator); else return Token::CommentLiteral; } @@ -392,7 +418,7 @@ Token Scanner::scanSlash() { // doxygen style /** natspec comment if (!advance()) /* slash star comment before EOS */ - return Token::Illegal; + return setError(ScannerError::IllegalCommentTerminator); else if (m_char == '*') { advance(); //consume the last '*' at /** @@ -410,7 +436,7 @@ Token Scanner::scanSlash() m_nextSkippedComment.location.end = sourcePos(); m_nextSkippedComment.token = comment; if (comment == Token::Illegal) - return Token::Illegal; + return Token::Illegal; // error already set else return Token::Whitespace; } @@ -425,6 +451,7 @@ Token Scanner::scanSlash() void Scanner::scanToken() { + m_nextToken.error = ScannerError::NoError; m_nextToken.literal.clear(); m_nextToken.extendedTokenInfo = make_tuple(0, 0); m_nextSkippedComment.literal.clear(); @@ -610,7 +637,7 @@ void Scanner::scanToken() if (m_char == '"' || m_char == '\'') token = scanHexString(); else - token = Token::IllegalHex; + token = setError(ScannerError::IllegalToken); } } else if (isDecimalDigit(m_char)) @@ -620,7 +647,7 @@ void Scanner::scanToken() else if (isSourcePastEndOfInput()) token = Token::EOS; else - token = selectToken(Token::Illegal); + token = selectErrorToken(ScannerError::IllegalToken); break; } // Continue scanning for tokens as long as we're just skipping @@ -713,13 +740,13 @@ Token Scanner::scanString() if (c == '\\') { if (isSourcePastEndOfInput() || !scanEscape()) - return Token::Illegal; + return setError(ScannerError::IllegalEscapeSequence); } else addLiteralChar(c); } if (m_char != quote) - return Token::Illegal; + return setError(ScannerError::IllegalStringEndQuote); literal.complete(); advance(); // consume quote return Token::StringLiteral; @@ -734,11 +761,14 @@ Token Scanner::scanHexString() { char c = m_char; if (!scanHexByte(c)) - return Token::IllegalHex; + // can only return false if hex-byte is incomplete (only one hex digit instead of two) + return setError(ScannerError::IllegalHexString); addLiteralChar(c); } + if (m_char != quote) - return Token::IllegalHex; + return setError(ScannerError::IllegalStringEndQuote); + literal.complete(); advance(); // consume quote return Token::StringLiteral; @@ -767,7 +797,7 @@ Token Scanner::scanNumber(char _charSeen) // we have already seen a decimal point of the float addLiteralChar('.'); if (m_char == '_') - return Token::Illegal; + return setError(ScannerError::IllegalToken); scanDecimalDigits(); // we know we have at least one digit } else @@ -784,14 +814,14 @@ Token Scanner::scanNumber(char _charSeen) kind = HEX; addLiteralCharAndAdvance(); if (!isHexDigit(m_char)) - return Token::Illegal; // we must have at least one hex digit after 'x' + return setError(ScannerError::IllegalHexDigit); // we must have at least one hex digit after 'x' while (isHexDigit(m_char) || m_char == '_') // We keep the underscores for later validation addLiteralCharAndAdvance(); } else if (isDecimalDigit(m_char)) // We do not allow octal numbers - return Token::Illegal; + return setError(ScannerError::OctalNotAllowed); } // Parse decimal digits and allow trailing fractional part. if (kind == DECIMAL) @@ -823,7 +853,7 @@ Token Scanner::scanNumber(char _charSeen) { solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number"); if (kind != DECIMAL) - return Token::Illegal; + return setError(ScannerError::IllegalExponent); else if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_') { // Recover from wrongly placed underscore as delimiter in literal with scientific @@ -838,8 +868,8 @@ Token Scanner::scanNumber(char _charSeen) addLiteralCharAndAdvance(); // 'e' | 'E' if (m_char == '+' || m_char == '-') addLiteralCharAndAdvance(); - if (!isDecimalDigit(m_char)) - return Token::Illegal; // we must have at least one decimal digit after 'e'/'E' + if (!isDecimalDigit(m_char)) // we must have at least one decimal digit after 'e'/'E' + return setError(ScannerError::IllegalExponent); scanDecimalDigits(); } // The source character immediately following a numeric literal must @@ -847,7 +877,7 @@ Token Scanner::scanNumber(char _charSeen) // section 7.8.3, page 17 (note that we read only one decimal digit // if the value is 0). if (isDecimalDigit(m_char) || isIdentifierStart(m_char)) - return Token::Illegal; + return setError(ScannerError::IllegalNumberEnd); literal.complete(); return Token::Number; } diff --git a/liblangutil/Scanner.h b/liblangutil/Scanner.h index da5e3dfb..d01e71e2 100644 --- a/liblangutil/Scanner.h +++ b/liblangutil/Scanner.h @@ -57,6 +57,7 @@ #include <liblangutil/SourceLocation.h> #include <libdevcore/Common.h> #include <libdevcore/CommonData.h> +#include <iosfwd> namespace langutil { @@ -65,6 +66,26 @@ class AstRawString; class AstValueFactory; class ParserRecorder; +enum class ScannerError +{ + NoError, + + IllegalToken, + IllegalHexString, + IllegalHexDigit, + IllegalCommentTerminator, + IllegalEscapeSequence, + IllegalStringEndQuote, + IllegalNumberSeparator, + IllegalExponent, + IllegalNumberEnd, + + OctalNotAllowed, +}; + +std::string to_string(ScannerError _errorCode); +std::ostream& operator<<(std::ostream& os, ScannerError _errorCode); + class Scanner { friend class LiteralScope; @@ -100,6 +121,10 @@ public: SourceLocation currentLocation() const { return m_currentToken.location; } std::string const& currentLiteral() const { return m_currentToken.literal; } std::tuple<unsigned, unsigned> const& currentTokenInfo() const { return m_currentToken.extendedTokenInfo; } + + /// Retrieves the last error that occurred during lexical analysis. + /// @note If no error occurred, the value is undefined. + ScannerError currentError() const noexcept { return m_currentToken.error; } ///@} ///@{ @@ -139,12 +164,19 @@ public: ///@} private: + inline Token setError(ScannerError _error) noexcept + { + m_nextToken.error = _error; + return Token::Illegal; + } + /// Used for the current and look-ahead token and comments struct TokenDesc { Token token; SourceLocation location; std::string literal; + ScannerError error = ScannerError::NoError; std::tuple<unsigned, unsigned> extendedTokenInfo; }; @@ -159,6 +191,7 @@ private: bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); } void rollback(int _amount) { m_char = m_source.rollback(_amount); } + inline Token selectErrorToken(ScannerError _err) { advance(); return setError(_err); } inline Token selectToken(Token _tok) { advance(); return _tok; } /// If the next character is _next, advance and return _then, otherwise return _else. inline Token selectToken(char _next, Token _then, Token _else); diff --git a/liblangutil/Token.h b/liblangutil/Token.h index d997b138..f832fdf7 100644 --- a/liblangutil/Token.h +++ b/liblangutil/Token.h @@ -263,8 +263,6 @@ namespace langutil \ /* Illegal token - not able to scan. */ \ T(Illegal, "ILLEGAL", 0) \ - /* Illegal hex token */ \ - T(IllegalHex, "ILLEGAL_HEX", 0) \ \ /* Scanner-internal use only. */ \ T(Whitespace, nullptr, 0) diff --git a/libsolidity/parsing/Parser.cpp b/libsolidity/parsing/Parser.cpp index ffe2be83..3f4a015b 100644 --- a/libsolidity/parsing/Parser.cpp +++ b/libsolidity/parsing/Parser.cpp @@ -1555,8 +1555,8 @@ ASTPointer<Expression> Parser::parsePrimaryExpression() expression = nodeFactory.createNode<TupleExpression>(components, isArray); break; } - case Token::IllegalHex: - fatalParserError("Expected even number of hex-nibbles within double-quotes."); + case Token::Illegal: + fatalParserError(to_string(m_scanner->currentError())); break; default: if (TokenTraits::isElementaryTypeName(token)) diff --git a/test/libsolidity/SolidityScanner.cpp b/test/libsolidity/SolidityScanner.cpp index 2d164ae3..02d91d32 100644 --- a/test/libsolidity/SolidityScanner.cpp +++ b/test/libsolidity/SolidityScanner.cpp @@ -88,6 +88,7 @@ BOOST_AUTO_TEST_CASE(string_escape_illegal) Scanner scanner(CharStream(" bla \"\\x6rf\" (illegalescape)")); BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier); BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); + BOOST_CHECK_EQUAL(scanner.currentError(), ScannerError::IllegalEscapeSequence); BOOST_CHECK_EQUAL(scanner.currentLiteral(), ""); // TODO recovery from illegal tokens should be improved BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); @@ -486,28 +487,32 @@ BOOST_AUTO_TEST_CASE(invalid_short_hex_literal) { Scanner scanner(CharStream("{ hex\"00112233F\"")); BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); - BOOST_CHECK_EQUAL(scanner.next(), Token::IllegalHex); + BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); + BOOST_CHECK_EQUAL(scanner.currentError(), ScannerError::IllegalHexString); } BOOST_AUTO_TEST_CASE(invalid_hex_literal_with_space) { Scanner scanner(CharStream("{ hex\"00112233FF \"")); BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); - BOOST_CHECK_EQUAL(scanner.next(), Token::IllegalHex); + BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); + BOOST_CHECK_EQUAL(scanner.currentError(), ScannerError::IllegalHexString); } BOOST_AUTO_TEST_CASE(invalid_hex_literal_with_wrong_quotes) { Scanner scanner(CharStream("{ hex\"00112233FF'")); BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); - BOOST_CHECK_EQUAL(scanner.next(), Token::IllegalHex); + BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); + BOOST_CHECK_EQUAL(scanner.currentError(), ScannerError::IllegalHexString); } BOOST_AUTO_TEST_CASE(invalid_hex_literal_nonhex_string) { Scanner scanner(CharStream("{ hex\"hello\"")); BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); - BOOST_CHECK_EQUAL(scanner.next(), Token::IllegalHex); + BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); + BOOST_CHECK_EQUAL(scanner.currentError(), ScannerError::IllegalHexString); } // COMMENTS diff --git a/test/libsolidity/syntaxTests/inlineAssembly/invalid/invalid_number.sol b/test/libsolidity/syntaxTests/inlineAssembly/invalid/invalid_number.sol index 715913de..b44d09e3 100644 --- a/test/libsolidity/syntaxTests/inlineAssembly/invalid/invalid_number.sol +++ b/test/libsolidity/syntaxTests/inlineAssembly/invalid/invalid_number.sol @@ -7,4 +7,4 @@ contract C { } // ---- // ParserError: (72-73): Literal, identifier or instruction expected. -// ParserError: (72-73): Expected primary expression. +// ParserError: (72-73): Octal numbers not allowed. diff --git a/test/libsolidity/syntaxTests/parsing/invalid_fixed_conversion_leading_zeroes_check.sol b/test/libsolidity/syntaxTests/parsing/invalid_fixed_conversion_leading_zeroes_check.sol index fb267ba3..352b5f8f 100644 --- a/test/libsolidity/syntaxTests/parsing/invalid_fixed_conversion_leading_zeroes_check.sol +++ b/test/libsolidity/syntaxTests/parsing/invalid_fixed_conversion_leading_zeroes_check.sol @@ -4,4 +4,4 @@ contract test { } } // ---- -// ParserError: (44-47): Expected primary expression. +// ParserError: (44-47): Identifier-start is not allowed at end of a number. diff --git a/test/libsolidity/syntaxTests/string/string_new_line.sol b/test/libsolidity/syntaxTests/string/string_new_line.sol index da2240f7..4cbc71a5 100644 --- a/test/libsolidity/syntaxTests/string/string_new_line.sol +++ b/test/libsolidity/syntaxTests/string/string_new_line.sol @@ -6,4 +6,4 @@ contract test { } } // ---- -// ParserError: (100-112): Expected primary expression. +// ParserError: (100-112): Expected string end-quote. diff --git a/test/libsolidity/syntaxTests/string/string_terminated_by_backslash.sol b/test/libsolidity/syntaxTests/string/string_terminated_by_backslash.sol index 3eaba6af..fba53a03 100644 --- a/test/libsolidity/syntaxTests/string/string_terminated_by_backslash.sol +++ b/test/libsolidity/syntaxTests/string/string_terminated_by_backslash.sol @@ -5,4 +5,4 @@ contract test { } } // ---- -// ParserError: (100-109): Expected primary expression.
\ No newline at end of file +// ParserError: (100-109): Expected string end-quote. diff --git a/test/libsolidity/syntaxTests/string/string_unterminated.sol b/test/libsolidity/syntaxTests/string/string_unterminated.sol index 3291781e..e893f4b1 100644 --- a/test/libsolidity/syntaxTests/string/string_unterminated.sol +++ b/test/libsolidity/syntaxTests/string/string_unterminated.sol @@ -4,4 +4,4 @@ contract test { } } // ---- -// ParserError: (100-112): Expected primary expression.
\ No newline at end of file +// ParserError: (100-112): Expected string end-quote. diff --git a/test/libsolidity/syntaxTests/string/string_unterminated_no_new_line.sol b/test/libsolidity/syntaxTests/string/string_unterminated_no_new_line.sol index e7be50d2..b100396b 100644 --- a/test/libsolidity/syntaxTests/string/string_unterminated_no_new_line.sol +++ b/test/libsolidity/syntaxTests/string/string_unterminated_no_new_line.sol @@ -1,4 +1,4 @@ contract test { function f() pure public { "abc\ // ---- -// ParserError: (47-53): Expected primary expression.
\ No newline at end of file +// ParserError: (47-53): Expected string end-quote. diff --git a/test/libsolidity/syntaxTests/unicode_escape_literals.sol b/test/libsolidity/syntaxTests/unicode_escape_literals.sol index a340487b..4415d493 100644 --- a/test/libsolidity/syntaxTests/unicode_escape_literals.sol +++ b/test/libsolidity/syntaxTests/unicode_escape_literals.sol @@ -28,4 +28,4 @@ contract test { } // ---- -// ParserError: (678-681): Expected primary expression. +// ParserError: (678-681): Invalid escape sequence. |