From aa4593cab3d60468e5ea4318012c5252ebbc7d13 Mon Sep 17 00:00:00 2001 From: Alex Beregszaszi Date: Tue, 7 Jun 2016 19:23:19 +0100 Subject: Support Unicode escape characters in string literals ('\uUUUU') Fixes #638 --- libsolidity/parsing/Scanner.cpp | 43 +++++++++++++++++++++++++++++++++++++++++ libsolidity/parsing/Scanner.h | 2 ++ 2 files changed, 45 insertions(+) (limited to 'libsolidity') diff --git a/libsolidity/parsing/Scanner.cpp b/libsolidity/parsing/Scanner.cpp index d630d0ab..616e6a96 100644 --- a/libsolidity/parsing/Scanner.cpp +++ b/libsolidity/parsing/Scanner.cpp @@ -177,6 +177,41 @@ bool Scanner::scanHexByte(char& o_scannedByte) return true; } +bool Scanner::scanUnicode(unsigned & o_codepoint) +{ + unsigned x = 0; + for (int i = 0; i < 4; i++) + { + int d = hexValue(m_char); + if (d < 0) + { + rollback(i); + return false; + } + x = x * 16 + d; + advance(); + } + o_codepoint = x; + return true; +} + +// This supports codepoints between 0000 and FFFF. +void Scanner::addUnicodeChar(unsigned codepoint) +{ + if (codepoint <= 0x7f) + addLiteralChar(codepoint); + else if (codepoint <= 0x7ff) + { + addLiteralChar(0xc0 | (codepoint >> 6)); + addLiteralChar(0x80 | (codepoint & 0x3f)); + } + else + { + addLiteralChar(0xe0 | (codepoint >> 12)); + addLiteralChar(0x80 | ((codepoint >> 6) & 0x3f)); + addLiteralChar(0x80 | (codepoint & 0x3f)); + } +} // Ensure that tokens can be stored in a byte. BOOST_STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); @@ -607,6 +642,14 @@ bool Scanner::scanEscape() case 'v': c = '\v'; break; + case 'u': + { + unsigned codepoint; + if (!scanUnicode(codepoint)) + return false; + addUnicodeChar(codepoint); + return true; + } case 'x': if (!scanHexByte(c)) return false; diff --git a/libsolidity/parsing/Scanner.h b/libsolidity/parsing/Scanner.h index cd60aff8..3dde42b3 100644 --- a/libsolidity/parsing/Scanner.h +++ b/libsolidity/parsing/Scanner.h @@ -175,6 +175,7 @@ private: inline void addLiteralChar(char c) { m_nextToken.literal.push_back(c); } inline void addCommentLiteralChar(char c) { m_nextSkippedComment.literal.push_back(c); } inline void addLiteralCharAndAdvance() { addLiteralChar(m_char); advance(); } + void addUnicodeChar(unsigned codepoint); ///@} bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); } @@ -185,6 +186,7 @@ private: inline Token::Value selectToken(char _next, Token::Value _then, Token::Value _else); bool scanHexByte(char& o_scannedByte); + bool scanUnicode(unsigned& o_codepoint); /// Scans a single Solidity token. void scanToken(); -- cgit From 6db12c4f882411293da71c5951ba7088a9712c13 Mon Sep 17 00:00:00 2001 From: Alex Beregszaszi Date: Mon, 1 Aug 2016 14:10:46 +0100 Subject: Rename addUnicodeChar to addUnicodeAsUTF8 --- libsolidity/parsing/Scanner.cpp | 4 ++-- libsolidity/parsing/Scanner.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'libsolidity') diff --git a/libsolidity/parsing/Scanner.cpp b/libsolidity/parsing/Scanner.cpp index 616e6a96..d730210a 100644 --- a/libsolidity/parsing/Scanner.cpp +++ b/libsolidity/parsing/Scanner.cpp @@ -196,7 +196,7 @@ bool Scanner::scanUnicode(unsigned & o_codepoint) } // This supports codepoints between 0000 and FFFF. -void Scanner::addUnicodeChar(unsigned codepoint) +void Scanner::addUnicodeAsUTF8(unsigned codepoint) { if (codepoint <= 0x7f) addLiteralChar(codepoint); @@ -647,7 +647,7 @@ bool Scanner::scanEscape() unsigned codepoint; if (!scanUnicode(codepoint)) return false; - addUnicodeChar(codepoint); + addUnicodeAsUTF8(codepoint); return true; } case 'x': diff --git a/libsolidity/parsing/Scanner.h b/libsolidity/parsing/Scanner.h index 3dde42b3..708adf8f 100644 --- a/libsolidity/parsing/Scanner.h +++ b/libsolidity/parsing/Scanner.h @@ -175,7 +175,7 @@ private: inline void addLiteralChar(char c) { m_nextToken.literal.push_back(c); } inline void addCommentLiteralChar(char c) { m_nextSkippedComment.literal.push_back(c); } inline void addLiteralCharAndAdvance() { addLiteralChar(m_char); advance(); } - void addUnicodeChar(unsigned codepoint); + void addUnicodeAsUTF8(unsigned codepoint); ///@} bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); } -- cgit