diff options
author | Christian Parpart <christian@parpart.family> | 2018-08-03 22:13:52 +0800 |
---|---|---|
committer | Christian Parpart <christian@ethereum.org> | 2018-08-08 19:38:46 +0800 |
commit | b9222808f61e00833f8c11cd196cafb50ec9e1b9 (patch) | |
tree | 93f7552a8effdb5e3ab3a240f4337edd3f8f249c /libsolidity/parsing/Scanner.cpp | |
parent | 09a36cba0223c16248335703412cee019c7aa59f (diff) | |
download | dexon-solidity-b9222808f61e00833f8c11cd196cafb50ec9e1b9.tar.gz dexon-solidity-b9222808f61e00833f8c11cd196cafb50ec9e1b9.tar.zst dexon-solidity-b9222808f61e00833f8c11cd196cafb50ec9e1b9.zip |
Cleanup & polish numbers-with-underscores parsing, also improving tests.
Diffstat (limited to 'libsolidity/parsing/Scanner.cpp')
-rw-r--r-- | libsolidity/parsing/Scanner.cpp | 60 |
1 files changed, 31 insertions, 29 deletions
diff --git a/libsolidity/parsing/Scanner.cpp b/libsolidity/parsing/Scanner.cpp index c223779e..30fdf21d 100644 --- a/libsolidity/parsing/Scanner.cpp +++ b/libsolidity/parsing/Scanner.cpp @@ -724,28 +724,18 @@ Token::Value Scanner::scanHexString() return Token::StringLiteral; } +// Parse for regex [:digit:]+(_[:digit:]+)* void Scanner::scanDecimalDigits() { - // Parse for regex [:digit:]+(_[:digit:]+)* + // MUST begin with a decimal digit. + if (!isDecimalDigit(m_char)) + return; - do - { - if (!isDecimalDigit(m_char)) - return; - while (isDecimalDigit(m_char)) - addLiteralCharAndAdvance(); + // May continue with decimal digit or underscore for grouping. + do addLiteralCharAndAdvance(); + while (!m_source.isPastEndOfInput() && (isDecimalDigit(m_char) || m_char == '_')); - if (m_char == '_') - { - advance(); - if (!isDecimalDigit(m_char)) // Trailing underscore. Rollback and allow next step to flag it as illegal - { - rollback(1); - return; - } - } - } - while (isDecimalDigit(m_char)); + // Defer further validation of underscore to SyntaxChecker. } Token::Value Scanner::scanNumber(char _charSeen) @@ -756,6 +746,8 @@ Token::Value Scanner::scanNumber(char _charSeen) { // we have already seen a decimal point of the float addLiteralChar('.'); + if (m_char == '_') + return Token::Illegal; scanDecimalDigits(); // we know we have at least one digit } else @@ -773,17 +765,9 @@ Token::Value Scanner::scanNumber(char _charSeen) addLiteralCharAndAdvance(); if (!isHexDigit(m_char)) return Token::Illegal; // we must have at least one hex digit after 'x'/'X' - char last = m_char; - while (isHexDigit(m_char) || m_char == '_') // Unlike decimal digits, we keep the underscores for later validation - { - if (m_char == '_' && last == '_') - return Token::Illegal; // Double underscore - last = m_char; + while (isHexDigit(m_char) || m_char == '_') // We keep the underscores for later validation addLiteralCharAndAdvance(); - } - if (last == '_') - return Token::Illegal; // Trailing underscore } else if (isDecimalDigit(m_char)) // We do not allow octal numbers @@ -795,9 +779,17 @@ Token::Value Scanner::scanNumber(char _charSeen) scanDecimalDigits(); // optional if (m_char == '.') { - // A '.' has to be followed by a number. + if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_') + { + // Assume the input may be a floating point number with leading '_' in fraction part. + // Recover by consuming it all but returning `Illegal` right away. + addLiteralCharAndAdvance(); // '.' + addLiteralCharAndAdvance(); // '_' + scanDecimalDigits(); + } if (m_source.isPastEndOfInput() || !isDecimalDigit(m_source.get(1))) { + // A '.' has to be followed by a number. literal.complete(); return Token::Number; } @@ -812,8 +804,18 @@ Token::Value Scanner::scanNumber(char _charSeen) solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number"); if (kind != DECIMAL) return Token::Illegal; + else if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_') + { + // Recover from wrongly placed underscore as delimiter in literal with scientific + // notation by consuming until the end. + addLiteralCharAndAdvance(); // 'e' + addLiteralCharAndAdvance(); // '_' + scanDecimalDigits(); + literal.complete(); + return Token::Number; + } // scan exponent - addLiteralCharAndAdvance(); + addLiteralCharAndAdvance(); // 'e' | 'E' if (m_char == '+' || m_char == '-') addLiteralCharAndAdvance(); if (!isDecimalDigit(m_char)) |