diff options
author | chriseth <chris@ethereum.org> | 2016-08-05 14:47:52 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-08-05 14:47:52 +0800 |
commit | 3c93a22d478b9439d6e226ad7954fe571117f439 (patch) | |
tree | 687cb5a48bfbd8ea356444cc9034978345229155 | |
parent | ff60ce988b29c792eacae6e0ec1501ae7fb50b19 (diff) | |
parent | b881dbb29200558d93c642ef41950d9d43a7e679 (diff) | |
download | dexon-solidity-3c93a22d478b9439d6e226ad7954fe571117f439.tar.gz dexon-solidity-3c93a22d478b9439d6e226ad7954fe571117f439.tar.zst dexon-solidity-3c93a22d478b9439d6e226ad7954fe571117f439.zip |
Merge pull request #666 from axic/feature/unicode-escape
Support unicode escape characters
-rw-r--r-- | docs/types.rst | 4 | ||||
-rw-r--r-- | libsolidity/parsing/Scanner.cpp | 43 | ||||
-rw-r--r-- | libsolidity/parsing/Scanner.h | 2 | ||||
-rw-r--r-- | test/libsolidity/SolidityScanner.cpp | 40 |
4 files changed, 88 insertions, 1 deletions
diff --git a/docs/types.rst b/docs/types.rst index 35f0e247..0c5aaf1b 100644 --- a/docs/types.rst +++ b/docs/types.rst @@ -214,7 +214,9 @@ a non-rational number). String Literals --------------- -String Literals are written with double quotes (``"abc"``). As with integer literals, their type can vary, but they are implicitly convertible to ``bytes`` if they fit, to ``bytes`` and to ``string``. +String Literals are written with double quotes (``"abc"``). As with integer literals, their type can vary, but they are implicitly convertible to ``bytes1``, ..., ``bytes32`` if they fit, to ``bytes`` and to ``string``. + +String Literals support escape characters, such as ``\n``, ``\xNN`` and ``\uNNNN``. ``\xNN`` takes a hex value and inserts the appropriate byte, while ``\uNNNN`` takes a Unicode codepoint and inserts an UTF8 sequence. .. index:: enum diff --git a/libsolidity/parsing/Scanner.cpp b/libsolidity/parsing/Scanner.cpp index d630d0ab..d730210a 100644 --- a/libsolidity/parsing/Scanner.cpp +++ b/libsolidity/parsing/Scanner.cpp @@ -177,6 +177,41 @@ bool Scanner::scanHexByte(char& o_scannedByte) return true; } +bool Scanner::scanUnicode(unsigned & o_codepoint) +{ + unsigned x = 0; + for (int i = 0; i < 4; i++) + { + int d = hexValue(m_char); + if (d < 0) + { + rollback(i); + return false; + } + x = x * 16 + d; + advance(); + } + o_codepoint = x; + return true; +} + +// This supports codepoints between 0000 and FFFF. +void Scanner::addUnicodeAsUTF8(unsigned codepoint) +{ + if (codepoint <= 0x7f) + addLiteralChar(codepoint); + else if (codepoint <= 0x7ff) + { + addLiteralChar(0xc0 | (codepoint >> 6)); + addLiteralChar(0x80 | (codepoint & 0x3f)); + } + else + { + addLiteralChar(0xe0 | (codepoint >> 12)); + addLiteralChar(0x80 | ((codepoint >> 6) & 0x3f)); + addLiteralChar(0x80 | (codepoint & 0x3f)); + } +} // Ensure that tokens can be stored in a byte. BOOST_STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); @@ -607,6 +642,14 @@ bool Scanner::scanEscape() case 'v': c = '\v'; break; + case 'u': + { + unsigned codepoint; + if (!scanUnicode(codepoint)) + return false; + addUnicodeAsUTF8(codepoint); + return true; + } case 'x': if (!scanHexByte(c)) return false; diff --git a/libsolidity/parsing/Scanner.h b/libsolidity/parsing/Scanner.h index cd60aff8..708adf8f 100644 --- a/libsolidity/parsing/Scanner.h +++ b/libsolidity/parsing/Scanner.h @@ -175,6 +175,7 @@ private: inline void addLiteralChar(char c) { m_nextToken.literal.push_back(c); } inline void addCommentLiteralChar(char c) { m_nextSkippedComment.literal.push_back(c); } inline void addLiteralCharAndAdvance() { addLiteralChar(m_char); advance(); } + void addUnicodeAsUTF8(unsigned codepoint); ///@} bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); } @@ -185,6 +186,7 @@ private: inline Token::Value selectToken(char _next, Token::Value _then, Token::Value _else); bool scanHexByte(char& o_scannedByte); + bool scanUnicode(unsigned& o_codepoint); /// Scans a single Solidity token. void scanToken(); diff --git a/test/libsolidity/SolidityScanner.cpp b/test/libsolidity/SolidityScanner.cpp index 10f30b8e..4443b9f6 100644 --- a/test/libsolidity/SolidityScanner.cpp +++ b/test/libsolidity/SolidityScanner.cpp @@ -291,6 +291,46 @@ BOOST_AUTO_TEST_CASE(empty_comment) } +BOOST_AUTO_TEST_CASE(valid_unicode_string_escape) +{ + Scanner scanner(CharStream("{ \"\\u00DAnicode\"")); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); + BOOST_CHECK_EQUAL(scanner.next(), Token::StringLiteral); + BOOST_CHECK_EQUAL(scanner.currentLiteral(), std::string("\xC3\x9Anicode", 8)); +} + +BOOST_AUTO_TEST_CASE(valid_unicode_string_escape_7f) +{ + Scanner scanner(CharStream("{ \"\\u007Fnicode\"")); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); + BOOST_CHECK_EQUAL(scanner.next(), Token::StringLiteral); + BOOST_CHECK_EQUAL(scanner.currentLiteral(), std::string("\x7Fnicode", 7)); +} + +BOOST_AUTO_TEST_CASE(valid_unicode_string_escape_7ff) +{ + Scanner scanner(CharStream("{ \"\\u07FFnicode\"")); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); + BOOST_CHECK_EQUAL(scanner.next(), Token::StringLiteral); + BOOST_CHECK_EQUAL(scanner.currentLiteral(), std::string("\xDF\xBFnicode", 8)); +} + +BOOST_AUTO_TEST_CASE(valid_unicode_string_escape_ffff) +{ + Scanner scanner(CharStream("{ \"\\uFFFFnicode\"")); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); + BOOST_CHECK_EQUAL(scanner.next(), Token::StringLiteral); + BOOST_CHECK_EQUAL(scanner.currentLiteral(), std::string("\xEF\xBF\xBFnicode", 9)); +} + +BOOST_AUTO_TEST_CASE(invalid_short_unicode_string_escape) +{ + Scanner scanner(CharStream("{ \"\\uFFnicode\"")); + BOOST_CHECK_EQUAL(scanner.currentToken(), Token::LBrace); + BOOST_CHECK_EQUAL(scanner.next(), Token::Illegal); +} + + BOOST_AUTO_TEST_SUITE_END() } |