aboutsummaryrefslogtreecommitdiffstats
path: root/libsolidity/parsing/Scanner.cpp
diff options
context:
space:
mode:
authorAlex Beregszaszi <alex@rtfs.hu>2016-06-08 02:23:19 +0800
committerAlex Beregszaszi <alex@rtfs.hu>2016-08-05 00:28:48 +0800
commitaa4593cab3d60468e5ea4318012c5252ebbc7d13 (patch)
treeb54fca8c8b8f4d816bd9423a7e090b0026b9c7a9 /libsolidity/parsing/Scanner.cpp
parentb83acfae5964dd1ebb10a40e00940596a388f3d7 (diff)
downloaddexon-solidity-aa4593cab3d60468e5ea4318012c5252ebbc7d13.tar.gz
dexon-solidity-aa4593cab3d60468e5ea4318012c5252ebbc7d13.tar.zst
dexon-solidity-aa4593cab3d60468e5ea4318012c5252ebbc7d13.zip
Support Unicode escape characters in string literals ('\uUUUU')
Fixes #638
Diffstat (limited to 'libsolidity/parsing/Scanner.cpp')
-rw-r--r--libsolidity/parsing/Scanner.cpp43
1 files changed, 43 insertions, 0 deletions
diff --git a/libsolidity/parsing/Scanner.cpp b/libsolidity/parsing/Scanner.cpp
index d630d0ab..616e6a96 100644
--- a/libsolidity/parsing/Scanner.cpp
+++ b/libsolidity/parsing/Scanner.cpp
@@ -177,6 +177,41 @@ bool Scanner::scanHexByte(char& o_scannedByte)
return true;
}
+bool Scanner::scanUnicode(unsigned & o_codepoint)
+{
+ unsigned x = 0;
+ for (int i = 0; i < 4; i++)
+ {
+ int d = hexValue(m_char);
+ if (d < 0)
+ {
+ rollback(i);
+ return false;
+ }
+ x = x * 16 + d;
+ advance();
+ }
+ o_codepoint = x;
+ return true;
+}
+
+// This supports codepoints between 0000 and FFFF.
+void Scanner::addUnicodeChar(unsigned codepoint)
+{
+ if (codepoint <= 0x7f)
+ addLiteralChar(codepoint);
+ else if (codepoint <= 0x7ff)
+ {
+ addLiteralChar(0xc0 | (codepoint >> 6));
+ addLiteralChar(0x80 | (codepoint & 0x3f));
+ }
+ else
+ {
+ addLiteralChar(0xe0 | (codepoint >> 12));
+ addLiteralChar(0x80 | ((codepoint >> 6) & 0x3f));
+ addLiteralChar(0x80 | (codepoint & 0x3f));
+ }
+}
// Ensure that tokens can be stored in a byte.
BOOST_STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
@@ -607,6 +642,14 @@ bool Scanner::scanEscape()
case 'v':
c = '\v';
break;
+ case 'u':
+ {
+ unsigned codepoint;
+ if (!scanUnicode(codepoint))
+ return false;
+ addUnicodeChar(codepoint);
+ return true;
+ }
case 'x':
if (!scanHexByte(c))
return false;