aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorchriseth <chris@ethereum.org>2018-09-10 19:00:03 +0800
committerGitHub <noreply@github.com>2018-09-10 19:00:03 +0800
commitb9164eaba2a192428a27bac045c529c438599af7 (patch)
treebdc128a766c64667537ddfe8fbe9bea55025aaea
parenta1848ac9470a97bf2f158b91a558391ee9f70c11 (diff)
parent55e4532c7231ea7f4ab54402ebac84406564b64b (diff)
downloaddexon-solidity-b9164eaba2a192428a27bac045c529c438599af7.tar.gz
dexon-solidity-b9164eaba2a192428a27bac045c529c438599af7.tar.zst
dexon-solidity-b9164eaba2a192428a27bac045c529c438599af7.zip
Merge pull request #4937 from ethereum/fixNewline_0425
[backport] Fix newline bugs
-rw-r--r--Changelog.md3
-rw-r--r--libsolidity/parsing/Scanner.cpp84
-rw-r--r--libsolidity/parsing/Scanner.h7
-rw-r--r--test/libsolidity/SolidityScanner.cpp106
4 files changed, 167 insertions, 33 deletions
diff --git a/Changelog.md b/Changelog.md
index 6397b23b..2a04c8a1 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -3,6 +3,9 @@
Bugfixes:
* Type Checker: Report error when using indexed structs in events with experimental ABIEncoderV2. This used to log wrong values.
* Type Checker: Report error when using structs in events without experimental ABIEncoderV2. This used to crash or log the wrong values.
+ * Parser: Treat unicode line endings as terminating strings and single-line comments.
+ * Parser: Disallow unterminated multi-line comments at the end of input.
+ * Parser: Treat ``/** /`` as unterminated multi-line comment.
### 0.4.24 (2018-05-16)
diff --git a/libsolidity/parsing/Scanner.cpp b/libsolidity/parsing/Scanner.cpp
index 6541f6c2..dbe1f389 100644
--- a/libsolidity/parsing/Scanner.cpp
+++ b/libsolidity/parsing/Scanner.cpp
@@ -243,22 +243,17 @@ bool Scanner::skipWhitespace()
return sourcePos() != startPosition;
}
-bool Scanner::skipWhitespaceExceptLF()
+void Scanner::skipWhitespaceExceptUnicodeLinebreak()
{
- int const startPosition = sourcePos();
- while (isWhiteSpace(m_char) && !isLineTerminator(m_char))
+ while (isWhiteSpace(m_char) && !isUnicodeLinebreak())
advance();
- // Return whether or not we skipped any characters.
- return sourcePos() != startPosition;
}
Token::Value Scanner::skipSingleLineComment()
{
- // The line terminator at the end of the line is not considered
- // to be part of the single-line comment; it is recognized
- // separately by the lexical grammar and becomes part of the
- // stream of input elements for the syntactic grammar
- while (!isLineTerminator(m_char))
+ // Line terminator is not part of the comment. If it is a
+ // non-ascii line terminator, it will result in a parser error.
+ while (!isUnicodeLinebreak())
if (!advance()) break;
return Token::Whitespace;
@@ -268,7 +263,9 @@ Token::Value Scanner::scanSingleLineDocComment()
{
LiteralScope literal(this, LITERAL_TYPE_COMMENT);
advance(); //consume the last '/' at ///
- skipWhitespaceExceptLF();
+
+ skipWhitespaceExceptUnicodeLinebreak();
+
while (!isSourcePastEndOfInput())
{
if (isLineTerminator(m_char))
@@ -287,6 +284,10 @@ Token::Value Scanner::scanSingleLineDocComment()
break; // next line is not a documentation comment, we are done
}
+ else if (isUnicodeLinebreak())
+ // Any line terminator that is not '\n' is considered to end the
+ // comment.
+ break;
addCommentLiteralChar(m_char);
advance();
}
@@ -321,6 +322,9 @@ Token::Value Scanner::scanMultiLineDocComment()
bool endFound = false;
bool charsAdded = false;
+ while (isWhiteSpace(m_char) && !isLineTerminator(m_char))
+ advance();
+
while (!isSourcePastEndOfInput())
{
//handle newlines in multline comments
@@ -372,7 +376,7 @@ Token::Value Scanner::scanSlash()
if (m_char == '/')
{
if (!advance()) /* double slash comment directly before EOS */
- return Token::Whitespace;
+ return Token::Whitespace;
else if (m_char == '/')
{
// doxygen style /// comment
@@ -390,24 +394,27 @@ Token::Value Scanner::scanSlash()
{
// doxygen style /** natspec comment
if (!advance()) /* slash star comment before EOS */
- return Token::Whitespace;
+ return Token::Illegal;
else if (m_char == '*')
{
advance(); //consume the last '*' at /**
- skipWhitespaceExceptLF();
- // special case of a closed normal multiline comment
- if (!m_source.isPastEndOfInput() && m_source.get(0) == '/')
- advance(); //skip the closing slash
- else // we actually have a multiline documentation comment
+ // "/**/"
+ if (m_char == '/')
{
- Token::Value comment;
- m_nextSkippedComment.location.start = firstSlashPosition;
- comment = scanMultiLineDocComment();
- m_nextSkippedComment.location.end = sourcePos();
- m_nextSkippedComment.token = comment;
+ advance(); //skip the closing slash
+ return Token::Whitespace;
}
- return Token::Whitespace;
+ // we actually have a multiline documentation comment
+ Token::Value comment;
+ m_nextSkippedComment.location.start = firstSlashPosition;
+ comment = scanMultiLineDocComment();
+ m_nextSkippedComment.location.end = sourcePos();
+ m_nextSkippedComment.token = comment;
+ if (comment == Token::Illegal)
+ return Token::Illegal;
+ else
+ return Token::Whitespace;
}
else
return skipMultiLineComment();
@@ -435,11 +442,6 @@ void Scanner::scanToken()
m_nextToken.location.start = sourcePos();
switch (m_char)
{
- case '\n':
- case ' ':
- case '\t':
- token = selectToken(Token::Whitespace);
- break;
case '"':
case '\'':
token = scanString();
@@ -675,18 +677,38 @@ bool Scanner::scanEscape()
if (!scanHexByte(c))
return false;
break;
+ default:
+ return false;
}
addLiteralChar(c);
return true;
}
+bool Scanner::isUnicodeLinebreak()
+{
+ if (0x0a <= m_char && m_char <= 0x0d)
+ // line feed, vertical tab, form feed, carriage return
+ return true;
+ else if (!m_source.isPastEndOfInput(1) && uint8_t(m_source.get(0)) == 0xc2 && uint8_t(m_source.get(1)) == 0x85)
+ // NEL - U+0085, C2 85 in utf8
+ return true;
+ else if (!m_source.isPastEndOfInput(2) && uint8_t(m_source.get(0)) == 0xe2 && uint8_t(m_source.get(1)) == 0x80 && (
+ uint8_t(m_source.get(2)) == 0xa8 || uint8_t(m_source.get(2)) == 0xa9
+ ))
+ // LS - U+2028, E2 80 A8 in utf8
+ // PS - U+2029, E2 80 A9 in utf8
+ return true;
+ else
+ return false;
+}
+
Token::Value Scanner::scanString()
{
char const quote = m_char;
advance(); // consume quote
LiteralScope literal(this, LITERAL_TYPE_STRING);
- while (m_char != quote && !isSourcePastEndOfInput() && !isLineTerminator(m_char))
+ while (m_char != quote && !isSourcePastEndOfInput() && !isUnicodeLinebreak())
{
char c = m_char;
advance();
@@ -710,7 +732,7 @@ Token::Value Scanner::scanHexString()
char const quote = m_char;
advance(); // consume quote
LiteralScope literal(this, LITERAL_TYPE_STRING);
- while (m_char != quote && !isSourcePastEndOfInput() && !isLineTerminator(m_char))
+ while (m_char != quote && !isSourcePastEndOfInput())
{
char c = m_char;
if (!scanHexByte(c))
diff --git a/libsolidity/parsing/Scanner.h b/libsolidity/parsing/Scanner.h
index 0adaa6fd..602532e4 100644
--- a/libsolidity/parsing/Scanner.h
+++ b/libsolidity/parsing/Scanner.h
@@ -197,8 +197,8 @@ private:
/// Skips all whitespace and @returns true if something was skipped.
bool skipWhitespace();
- /// Skips all whitespace except Line feeds and returns true if something was skipped
- bool skipWhitespaceExceptLF();
+ /// Skips all whitespace that are neither '\r' nor '\n'.
+ void skipWhitespaceExceptUnicodeLinebreak();
Token::Value skipSingleLineComment();
Token::Value skipMultiLineComment();
@@ -218,6 +218,9 @@ private:
/// is scanned.
bool scanEscape();
+ /// @returns true iff we are currently positioned at a unicode line break.
+ bool isUnicodeLinebreak();
+
/// Return the current source position.
int sourcePos() const { return m_source.position(); }
bool isSourcePastEndOfInput() const { return m_source.isPastEndOfInput(); }
diff --git a/test/libsolidity/SolidityScanner.cpp b/test/libsolidity/SolidityScanner.cpp
index 020bce7f..4ccc6788 100644
--- a/test/libsolidity/SolidityScanner.cpp
+++ b/test/libsolidity/SolidityScanner.cpp
@@ -23,6 +23,8 @@
#include <libsolidity/parsing/Scanner.h>
#include <boost/test/unit_test.hpp>
+using namespace std;
+
namespace dev
{
namespace solidity
@@ -393,6 +395,110 @@ BOOST_AUTO_TEST_CASE(invalid_hex_literal_nonhex_string)
BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
}
+BOOST_AUTO_TEST_CASE(invalid_multiline_comment_close)
+{
+ // This used to parse as "comment", "identifier"
+ Scanner scanner(CharStream("/** / x"));
+ BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
+ BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+}
+
+BOOST_AUTO_TEST_CASE(multiline_doc_comment_at_eos)
+{
+ // This used to parse as "whitespace"
+ Scanner scanner(CharStream("/**"));
+ BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
+ BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+}
+
+BOOST_AUTO_TEST_CASE(multiline_comment_at_eos)
+{
+ Scanner scanner(CharStream("/*"));
+ BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
+ BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+}
+
+BOOST_AUTO_TEST_CASE(regular_line_break_in_single_line_comment)
+{
+ for (auto const& nl: {"\r", "\n"})
+ {
+ Scanner scanner(CharStream("// abc " + string(nl) + " def "));
+ BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "");
+ BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier);
+ BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");
+ BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+ }
+}
+
+BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_single_line_comment)
+{
+ for (auto const& nl: {"\v", "\f", "\xE2\x80\xA8", "\xE2\x80\xA9"})
+ {
+ Scanner scanner(CharStream("// abc " + string(nl) + " def "));
+ BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "");
+ BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
+ for (size_t i = 0; i < string(nl).size() - 1; i++)
+ BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
+ BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
+ BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");
+ BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+ }
+}
+
+BOOST_AUTO_TEST_CASE(regular_line_breaks_in_single_line_doc_comment)
+{
+ for (auto const& nl: {"\r", "\n"})
+ {
+ Scanner scanner(CharStream("/// abc " + string(nl) + " def "));
+ BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "abc ");
+ BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Identifier);
+ BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");
+ BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+ }
+}
+
+BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_single_line_doc_comment)
+{
+ for (auto const& nl: {"\v", "\f", "\xE2\x80\xA8", "\xE2\x80\xA9"})
+ {
+ Scanner scanner(CharStream("/// abc " + string(nl) + " def "));
+ BOOST_CHECK_EQUAL(scanner.currentCommentLiteral(), "abc ");
+ BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
+ for (size_t i = 0; i < string(nl).size() - 1; i++)
+ BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
+ BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
+ BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");
+ BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+ }
+}
+
+BOOST_AUTO_TEST_CASE(regular_line_breaks_in_strings)
+{
+ for (auto const& nl: {"\n", "\r"})
+ {
+ Scanner scanner(CharStream("\"abc " + string(nl) + " def\""));
+ BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
+ BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
+ BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");
+ BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
+ BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+ }
+}
+
+BOOST_AUTO_TEST_CASE(irregular_line_breaks_in_strings)
+{
+ for (auto const& nl: {"\v", "\f", "\xE2\x80\xA8", "\xE2\x80\xA9"})
+ {
+ Scanner scanner(CharStream("\"abc " + string(nl) + " def\""));
+ BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
+ for (size_t i = 0; i < string(nl).size(); i++)
+ BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
+ BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
+ BOOST_CHECK_EQUAL(scanner.currentLiteral(), "def");
+ BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
+ BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+ }
+}
BOOST_AUTO_TEST_SUITE_END()