aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--docs/types.rst4
-rw-r--r--libsolidity/parsing/Scanner.cpp43
-rw-r--r--libsolidity/parsing/Scanner.h2
-rw-r--r--test/libsolidity/SolidityScanner.cpp40
4 files changed, 88 insertions, 1 deletions
diff --git a/docs/types.rst b/docs/types.rst
index 35f0e247..0c5aaf1b 100644
--- a/docs/types.rst
+++ b/docs/types.rst
@@ -214,7 +214,9 @@ a non-rational number).
String Literals
---------------
-String Literals are written with double quotes (``"abc"``). As with integer literals, their type can vary, but they are implicitly convertible to ``bytes`` if they fit, to ``bytes`` and to ``string``.
+String Literals are written with double quotes (``"abc"``). As with integer literals, their type can vary, but they are implicitly convertible to ``bytes1``, ..., ``bytes32`` if they fit, to ``bytes`` and to ``string``.
+
+String Literals support escape characters, such as ``\n``, ``\xNN`` and ``\uNNNN``. ``\xNN`` takes a hex value and inserts the appropriate byte, while ``\uNNNN`` takes a Unicode codepoint and inserts an UTF8 sequence.
.. index:: enum
diff --git a/libsolidity/parsing/Scanner.cpp b/libsolidity/parsing/Scanner.cpp
index d630d0ab..d730210a 100644
--- a/libsolidity/parsing/Scanner.cpp
+++ b/libsolidity/parsing/Scanner.cpp
@@ -177,6 +177,41 @@ bool Scanner::scanHexByte(char& o_scannedByte)
return true;
}
+bool Scanner::scanUnicode(unsigned & o_codepoint)
+{
+ unsigned x = 0;
+ for (int i = 0; i < 4; i++)
+ {
+ int d = hexValue(m_char);
+ if (d < 0)
+ {
+ rollback(i);
+ return false;
+ }
+ x = x * 16 + d;
+ advance();
+ }
+ o_codepoint = x;
+ return true;
+}
+
+// This supports codepoints between 0000 and FFFF.
+void Scanner::addUnicodeAsUTF8(unsigned codepoint)
+{
+ if (codepoint <= 0x7f)
+ addLiteralChar(codepoint);
+ else if (codepoint <= 0x7ff)
+ {
+ addLiteralChar(0xc0 | (codepoint >> 6));
+ addLiteralChar(0x80 | (codepoint & 0x3f));
+ }
+ else
+ {
+ addLiteralChar(0xe0 | (codepoint >> 12));
+ addLiteralChar(0x80 | ((codepoint >> 6) & 0x3f));
+ addLiteralChar(0x80 | (codepoint & 0x3f));
+ }
+}
// Ensure that tokens can be stored in a byte.
BOOST_STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
@@ -607,6 +642,14 @@ bool Scanner::scanEscape()
case 'v':
c = '\v';
break;
+ case 'u':
+ {
+ unsigned codepoint;
+ if (!scanUnicode(codepoint))
+ return false;
+ addUnicodeAsUTF8(codepoint);
+ return true;
+ }
case 'x':
if (!scanHexByte(c))
return false;
diff --git a/libsolidity/parsing/Scanner.h b/libsolidity/parsing/Scanner.h
index cd60aff8..708adf8f 100644
--- a/libsolidity/parsing/Scanner.h
+++ b/libsolidity/parsing/Scanner.h
@@ -175,6 +175,7 @@ private:
inline void addLiteralChar(char c) { m_nextToken.literal.push_back(c); }
inline void addCommentLiteralChar(char c) { m_nextSkippedComment.literal.push_back(c); }
inline void addLiteralCharAndAdvance() { addLiteralChar(m_char); advance(); }
+ void addUnicodeAsUTF8(unsigned codepoint);
///@}
bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); }
@@ -185,6 +186,7 @@ private:
inline Token::Value selectToken(char _next, Token::Value _then, Token::Value _else);
bool scanHexByte(char& o_scannedByte);
+ bool scanUnicode(unsigned& o_codepoint);
/// Scans a single Solidity token.
void scanToken();
diff --git a/test/libsolidity/SolidityScanner.cpp b/test/libsolidity/SolidityScanner.cpp
index 10f30b8e..4443b9f6 100644
--- a/test/libsolidity/SolidityScanner.cpp
+++ b/test/libsolidity/SolidityScanner.cpp
@@ -291,6 +291,46 @@ BOOST_AUTO_TEST_CASE(empty_comment)
}
+BOOST_AUTO_TEST_CASE(valid_unicode_string_escape)
+{
+ Scanner scanner(CharStream("{ \"\\u00DAnicode\""));
+ BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace);
+ BOOST_CHECK_EQUAL(scanner.next(), Token::StringLiteral);
+ BOOST_CHECK_EQUAL(scanner.currentLiteral(), std::string("\xC3\x9Anicode", 8));
+}
+
+BOOST_AUTO_TEST_CASE(valid_unicode_string_escape_7f)
+{
+ Scanner scanner(CharStream("{ \"\\u007Fnicode\""));
+ BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace);
+ BOOST_CHECK_EQUAL(scanner.next(), Token::StringLiteral);
+ BOOST_CHECK_EQUAL(scanner.currentLiteral(), std::string("\x7Fnicode", 7));
+}
+
+BOOST_AUTO_TEST_CASE(valid_unicode_string_escape_7ff)
+{
+ Scanner scanner(CharStream("{ \"\\u07FFnicode\""));
+ BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace);
+ BOOST_CHECK_EQUAL(scanner.next(), Token::StringLiteral);
+ BOOST_CHECK_EQUAL(scanner.currentLiteral(), std::string("\xDF\xBFnicode", 8));
+}
+
+BOOST_AUTO_TEST_CASE(valid_unicode_string_escape_ffff)
+{
+ Scanner scanner(CharStream("{ \"\\uFFFFnicode\""));
+ BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace);
+ BOOST_CHECK_EQUAL(scanner.next(), Token::StringLiteral);
+ BOOST_CHECK_EQUAL(scanner.currentLiteral(), std::string("\xEF\xBF\xBFnicode", 9));
+}
+
+BOOST_AUTO_TEST_CASE(invalid_short_unicode_string_escape)
+{
+ Scanner scanner(CharStream("{ \"\\uFFnicode\""));
+ BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace);
+ BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal);
+}
+
+
BOOST_AUTO_TEST_SUITE_END()
}