diff options
author | chriseth <chris@ethereum.org> | 2018-09-10 19:00:03 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-09-10 19:00:03 +0800 |
commit | b9164eaba2a192428a27bac045c529c438599af7 (patch) | |
tree | bdc128a766c64667537ddfe8fbe9bea55025aaea | |
parent | a1848ac9470a97bf2f158b91a558391ee9f70c11 (diff) | |
parent | 55e4532c7231ea7f4ab54402ebac84406564b64b (diff) | |
download | dexon-solidity-b9164eaba2a192428a27bac045c529c438599af7.tar.gz dexon-solidity-b9164eaba2a192428a27bac045c529c438599af7.tar.zst dexon-solidity-b9164eaba2a192428a27bac045c529c438599af7.zip |
Merge pull request #4937 from ethereum/fixNewline_0425
[backport] Fix newline bugs
-rw-r--r-- | Changelog.md | 3 | ||||
-rw-r--r-- | libsolidity/parsing/Scanner.cpp | 84 | ||||
-rw-r--r-- | libsolidity/parsing/Scanner.h | 7 | ||||
-rw-r--r-- | test/libsolidity/SolidityScanner.cpp | 106 |
4 files changed, 167 insertions, 33 deletions
diff --git a/Changelog.md b/Changelog.md index 6397b23b..2a04c8a1 100644 --- a/Changelog.md +++ b/Changelog.md @@ -3,6 +3,9 @@ Bugfixes: * Type Checker: Report error when using indexed structs in events with experimental ABIEncoderV2. This used to log wrong values. * Type Checker: Report error when using structs in events without experimental ABIEncoderV2. This used to crash or log the wrong values. + * Parser: Treat unicode line endings as terminating strings and single-line comments. + * Parser: Disallow unterminated multi-line comments at the end of input. + * Parser: Treat ``/** /`` as unterminated multi-line comment. ### 0.4.24 (2018-05-16) diff --git a/libsolidity/parsing/Scanner.cpp b/libsolidity/parsing/Scanner.cpp index 6541f6c2..dbe1f389 100644 --- a/libsolidity/parsing/Scanner.cpp +++ b/libsolidity/parsing/Scanner.cpp @@ -243,22 +243,17 @@ bool Scanner::skipWhitespace() return sourcePos() != startPosition; } -bool Scanner::skipWhitespaceExceptLF() +void Scanner::skipWhitespaceExceptUnicodeLinebreak() { - int const startPosition = sourcePos(); - while (isWhiteSpace(m_char) && !isLineTerminator(m_char)) + while (isWhiteSpace(m_char) && !isUnicodeLinebreak()) advance(); - // Return whether or not we skipped any characters. - return sourcePos() != startPosition; } Token::Value Scanner::skipSingleLineComment() { - // The line terminator at the end of the line is not considered - // to be part of the single-line comment; it is recognized - // separately by the lexical grammar and becomes part of the - // stream of input elements for the syntactic grammar - while (!isLineTerminator(m_char)) + // Line terminator is not part of the comment. If it is a + // non-ascii line terminator, it will result in a parser error. + while (!isUnicodeLinebreak()) if (!advance()) break; return Token::Whitespace; @@ -268,7 +263,9 @@ Token::Value Scanner::scanSingleLineDocComment() { LiteralScope literal(this, LITERAL_TYPE_COMMENT); advance(); //consume the last '/' at /// - skipWhitespaceExceptLF(); + + skipWhitespaceExceptUnicodeLinebreak(); + while (!isSourcePastEndOfInput()) { if (isLineTerminator(m_char)) @@ -287,6 +284,10 @@ Token::Value Scanner::scanSingleLineDocComment() break; // next line is not a documentation comment, we are done } + else if (isUnicodeLinebreak()) + // Any line terminator that is not '\n' is considered to end the + // comment. + break; addCommentLiteralChar(m_char); advance(); } @@ -321,6 +322,9 @@ Token::Value Scanner::scanMultiLineDocComment() bool endFound = false; bool charsAdded = false; + while (isWhiteSpace(m_char) && !isLineTerminator(m_char)) + advance(); + while (!isSourcePastEndOfInput()) { //handle newlines in multline comments @@ -372,7 +376,7 @@ Token::Value Scanner::scanSlash() if (m_char == '/') { if (!advance()) /* double slash comment directly before EOS */ - return Token::Whitespace; + return Token::Whitespace; else if (m_char == '/') { // doxygen style /// comment @@ -390,24 +394,27 @@ Token::Value Scanner::scanSlash() { // doxygen style /** natspec comment if (!advance()) /* slash star comment before EOS */ - return Token::Whitespace; + return Token::Illegal; else if (m_char == '*') { advance(); //consume the last '*' at /** - skipWhitespaceExceptLF(); - // special case of a closed normal multiline comment - if (!m_source.isPastEndOfInput() && m_source.get(0) == '/') - advance(); //skip the closing slash - else // we actually have a multiline documentation comment + // "/**/" + if (m_char == '/') { - Token::Value comment; - m_nextSkippedComment.location.start = firstSlashPosition; - comment = scanMultiLineDocComment(); - m_nextSkippedComment.location.end = sourcePos(); - m_nextSkippedComment.token = comment; + advance(); //skip the closing slash + return Token::Whitespace; } - return Token::Whitespace; + // we actually have a multiline documentation comment + Token::Value comment; + m_nextSkippedComment.location.start = firstSlashPosition; + comment = scanMultiLineDocComment(); + m_nextSkippedComment.location.end = sourcePos(); + m_nextSkippedComment.token = comment; + if (comment == Token::Illegal) + return Token::Illegal; + else + return Token::Whitespace; } else return skipMultiLineComment(); @@ -435,11 +442,6 @@ void Scanner::scanToken() m_nextToken.location.start = sourcePos(); switch (m_char) { - case '\n': - case ' ': - case '\t': - token = selectToken(Token::Whitespace); - break; case '"': case '\'': token = scanString(); @@ -675,18 +677,38 @@ bool Scanner::scanEscape() if (!scanHexByte(c)) return false; break; + default: + return false; } addLiteralChar(c); return true; } +bool Scanner::isUnicodeLinebreak() +{ + if (0x0a <= m_char && m_char <= 0x0d) + // line feed, vertical tab, form feed, carriage return + return true; + else if (!m_source.isPastEndOfInput(1) && uint8_t(m_source.get(0)) == 0xc2 && uint8_t(m_source.get(1)) == 0x85) + // NEL - U+0085, C2 85 in utf8 + return true; + else if (!m_source.isPastEndOfInput(2) && uint8_t(m_source.get(0)) == 0xe2 && uint8_t(m_source.get(1)) == 0x80 && ( + uint8_t(m_source.get(2)) == 0xa8 || uint8_t(m_source.get(2)) == 0xa9 + )) + // LS - U+2028, E2 80 A8 in utf8 + // PS - U+2029, E2 80 A9 in utf8 + return true; + else + return false; +} + Token::Value Scanner::scanString() { char const quote = m_char; advance(); // consume quote LiteralScope literal(this, LITERAL_TYPE_STRING); - while (m_char != quote && !isSourcePastEndOfInput() && !isLineTerminator(m_char)) + while (m_char != quote && !isSourcePastEndOfInput() && !isUnicodeLinebreak()) { char c = m_char; advance(); @@ -710,7 +732,7 @@ Token::Value Scanner::scanHexString() char const quote = m_char; advance(); // consume quote LiteralScope literal(this, LITERAL_TYPE_STRING); - while (m_char != quote && !isSourcePastEndOfInput() && !isLineTerminator(m_char)) + while (m_char != quote && !isSourcePastEndOfInput()) { char c = m_char; if (!scanHexByte(c)) diff --git a/libsolidity/parsing/Scanner.h b/libsolidity/parsing/Scanner.h index 0adaa6fd..602532e4 100644 --- a/libsolidity/parsing/Scanner.h +++ b/libsolidity/parsing/Scanner.h @@ -197,8 +197,8 @@ private: /// Skips all whitespace and @returns true if something was skipped. bool skipWhitespace(); - /// Skips all whitespace except Line feeds and returns true if something was skipped - bool skipWhitespaceExceptLF(); + /// Skips all whitespace that are neither '\r' nor '\n'. + void skipWhitespaceExceptUnicodeLinebreak(); Token::Value skipSingleLineComment(); Token::Value skipMultiLineComment(); @@ -218,6 +218,9 @@ private: /// is scanned. bool scanEscape(); + /// @returns true iff we are currently positioned at a unicode line break. + bool isUnicodeLinebreak(); + /// Return the current source position. int sourcePos() const { return m_source.position(); } bool isSourcePastEndOfInput() const { return m_source.isPastEndOfInput(); } diff --git a/test/libsolidity/SolidityScanner.cpp b/test/libsolidity/SolidityScanner.cpp index 020bce7f..4ccc6788 100644 --- a/test/libsolidity/SolidityScanner.cpp +++ b/test/libsolidity/SolidityScanner.cpp @@ -23,6 +23,8 @@ #include <libsolidity/parsing/Scanner.h> #include <boost/test/unit_test.hpp> +using namespace std; + namespace dev { namespace solidity @@ -393,6 +395,110 @@ BOOST_AUTO_TEST_CASE(invalid_hex_literal_nonhex_string) BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); } +BOOST_AUTO_TEST_CASE(invalid_multiline_comment_close) +{ + // This used to parse as "comment", "identifier" + Scanner scanner(CharStream("/** / x")); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal); + BOOST_CHECK_EQUAL(scanner.next(), Token::EOS); +} + +BOOST_AUTO_TEST_CASE(multiline_doc_comment_at_eos) +{ + // This used to parse as "whitespace" + Scanner scanner(CharStream("/**")); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal); + BOOST_CHECK_EQUAL(scanner.next(), Token::EOS); +} + +BOOST_AUTO_TEST_CASE(multiline_comment_at_eos) +{ + Scanner scanner(CharStream("/*")); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal); + BOOST_CHECK_EQUAL(scanner.next(), Token::EOS); +} + +BOOST_AUTO_TEST_CASE(regular_line_break_in_single_line_comment) +{ + for (auto const& nl: {"\r", "\n"}) + { + Scanner scanner(CharStream("// abc " + string(nl) + " def ")); + BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), ""); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def"); + BOOST_CHECK_EQUAL(scanner.next(), Token::EOS); + } +} + +BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_single_line_comment) +{ + for (auto const& nl: {"\v", "\f", "\xE2\x80\xA8", "\xE2\x80\xA9"}) + { + Scanner scanner(CharStream("// abc " + string(nl) + " def ")); + BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), ""); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal); + for (size_t i = 0; i < string(nl).size() - 1; i++) + BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); + BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def"); + BOOST_CHECK_EQUAL(scanner.next(), Token::EOS); + } +} + +BOOST_AUTO_TEST_CASE(regular_line_breaks_in_single_line_doc_comment) +{ + for (auto const& nl: {"\r", "\n"}) + { + Scanner scanner(CharStream("/// abc " + string(nl) + " def ")); + BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "abc "); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def"); + BOOST_CHECK_EQUAL(scanner.next(), Token::EOS); + } +} + +BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_single_line_doc_comment) +{ + for (auto const& nl: {"\v", "\f", "\xE2\x80\xA8", "\xE2\x80\xA9"}) + { + Scanner scanner(CharStream("/// abc " + string(nl) + " def ")); + BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "abc "); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal); + for (size_t i = 0; i < string(nl).size() - 1; i++) + BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); + BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def"); + BOOST_CHECK_EQUAL(scanner.next(), Token::EOS); + } +} + +BOOST_AUTO_TEST_CASE(regular_line_breaks_in_strings) +{ + for (auto const& nl: {"\n", "\r"}) + { + Scanner scanner(CharStream("\"abc " + string(nl) + " def\"")); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal); + BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def"); + BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); + BOOST_CHECK_EQUAL(scanner.next(), Token::EOS); + } +} + +BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_strings) +{ + for (auto const& nl: {"\v", "\f", "\xE2\x80\xA8", "\xE2\x80\xA9"}) + { + Scanner scanner(CharStream("\"abc " + string(nl) + " def\"")); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal); + for (size_t i = 0; i < string(nl).size(); i++) + BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); + BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier); + BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def"); + BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); + BOOST_CHECK_EQUAL(scanner.next(), Token::EOS); + } +} BOOST_AUTO_TEST_SUITE_END() |