From 7ce7d3e74c7e2de4514e4763bc88323dad214235 Mon Sep 17 00:00:00 2001 From: chriseth Date: Thu, 15 Dec 2016 14:03:31 +0100 Subject: Update develop version. --- CMakeLists.txt | 2 +- Changelog.md | 2 ++ docs/conf.py | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ee66eebd..ecd857ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ include(EthPolicy) eth_policy() # project name and version should be set after cmake_policy CMP0048 -set(PROJECT_VERSION "0.4.7") +set(PROJECT_VERSION "0.4.8") project(solidity VERSION ${PROJECT_VERSION}) # Let's find our dependencies diff --git a/Changelog.md b/Changelog.md index 096c72d4..9faf0f8e 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,3 +1,5 @@ +### 0.4.8 (unreleased) + ### 0.4.7 (2016-12-15) Features: diff --git a/docs/conf.py b/docs/conf.py index 2bc79fd9..ecabbb86 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -56,9 +56,9 @@ copyright = '2016, Ethereum' # built documents. # # The short X.Y version. -version = '0.4.7' +version = '0.4.8' # The full version, including alpha/beta/rc tags. -release = '0.4.7-develop' +release = '0.4.8-develop' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -- cgit From 31564d94b1f565f8ab9b6d7b7e673febace12643 Mon Sep 17 00:00:00 2001 From: Michael Coblenz Date: Mon, 19 Dec 2016 15:08:18 -0500 Subject: Fixed misleading typo "now possible" should instead read "not possible." --- docs/control-structures.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/control-structures.rst b/docs/control-structures.rst index 0802c085..f59be2fe 100644 --- a/docs/control-structures.rst +++ b/docs/control-structures.rst @@ -208,7 +208,7 @@ Creating Contracts via ``new`` A contract can create a new contract using the ``new`` keyword. The full code of the contract being created has to be known and, thus, recursive -creation-dependencies are now possible. +creation-dependencies are not possible. :: -- cgit From 4b3f147e65a16a19410c379698cba08318d118ef Mon Sep 17 00:00:00 2001 From: Michael Coblenz Date: Tue, 20 Dec 2016 13:48:01 -0500 Subject: Clarified support for recursive creation. --- docs/control-structures.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/control-structures.rst b/docs/control-structures.rst index f59be2fe..b57c12f8 100644 --- a/docs/control-structures.rst +++ b/docs/control-structures.rst @@ -207,8 +207,8 @@ Creating Contracts via ``new`` ============================== A contract can create a new contract using the ``new`` keyword. The full -code of the contract being created has to be known and, thus, recursive -creation-dependencies are not possible. +code of the contract being created has to be known; though recursive +creation-dependencies are possible, cyclic dependencies are not. :: -- cgit From 5148de3ed714d99c93676174131cbcfd29c25753 Mon Sep 17 00:00:00 2001 From: Federico Bond Date: Wed, 21 Dec 2016 19:27:31 -0300 Subject: Fix mapping syntax in docs --- docs/types.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/types.rst b/docs/types.rst index 069a9190..6b67e684 100644 --- a/docs/types.rst +++ b/docs/types.rst @@ -765,7 +765,7 @@ assigning it to a local variable, as in Mappings ======== -Mapping types are declared as ``mapping _KeyType => _ValueType``. +Mapping types are declared as ``mapping(_KeyType => _ValueType)``. Here ``_KeyType`` can be almost any type except for a mapping, a dynamically sized array, a contract, an enum and a struct. ``_ValueType`` can actually be any type, including mappings. -- cgit From c163a219de96c2ad3f5b8f5e9a473924df742f46 Mon Sep 17 00:00:00 2001 From: Michael Coblenz Date: Thu, 22 Dec 2016 10:35:58 -0500 Subject: Corrected explanation per discussion. --- docs/control-structures.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/control-structures.rst b/docs/control-structures.rst index b57c12f8..6c0b0f27 100644 --- a/docs/control-structures.rst +++ b/docs/control-structures.rst @@ -207,8 +207,8 @@ Creating Contracts via ``new`` ============================== A contract can create a new contract using the ``new`` keyword. The full -code of the contract being created has to be known; though recursive -creation-dependencies are possible, cyclic dependencies are not. +code of the contract being created has to be known in advance, so recursive +creation-dependencies are not possible. :: -- cgit From 2a7b01982c2b49399b9ee0dd89c7827af28c6280 Mon Sep 17 00:00:00 2001 From: Federico Bond Date: Sat, 24 Dec 2016 13:00:11 -0300 Subject: Fix StorageLocation position for ArrayTypeName rule in grammar --- docs/grammar.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/grammar.txt b/docs/grammar.txt index d15fbaf7..7bb45bba 100644 --- a/docs/grammar.txt +++ b/docs/grammar.txt @@ -38,7 +38,7 @@ TypeNameList = '(' ( TypeName (',' TypeName )* )? ')' VariableDeclaration = TypeName Identifier TypeName = ElementaryTypeName | Identifier StorageLocation? | Mapping | ArrayTypeName | FunctionTypeName Mapping = 'mapping' '(' ElementaryTypeName '=>' TypeName ')' -ArrayTypeName = TypeName StorageLocation? '[' Expression? ']' +ArrayTypeName = TypeName '[' Expression? ']' StorageLocation? FunctionTypeName = 'function' TypeNameList ( 'internal' | 'external' | 'constant' | 'payable' )* ( 'returns' TypeNameList )? StorageLocation = 'memory' | 'storage' -- cgit From 507525be5f3fec5860d226010beea6550373fd0b Mon Sep 17 00:00:00 2001 From: Federico Bond Date: Sat, 24 Dec 2016 13:10:51 -0300 Subject: Fix NumberLiteral rule for hex numbers --- docs/grammar.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/grammar.txt b/docs/grammar.txt index 7bb45bba..5fac96a1 100644 --- a/docs/grammar.txt +++ b/docs/grammar.txt @@ -90,13 +90,16 @@ MemberAccess = Expression '.' Identifier IndexAccess = Expression '[' Expression? ']' BooleanLiteral = 'true' | 'false' -NumberLiteral = '0x'? [0-9]+ (' ' NumberUnit)? +NumberLiteral = ( HexNumber | DecimalNumber ) (' ' NumberUnit)? NumberUnit = 'wei' | 'szabo' | 'finney' | 'ether' | 'seconds' | 'minutes' | 'hours' | 'days' | 'weeks' | 'years' HexLiteral = 'hex' ('"' ([0-9a-fA-F]{2})* '"' | '\'' ([0-9a-fA-F]{2})* '\'') StringLiteral = '"' ([^"\r\n\\] | '\\' .)* '"' Identifier = [a-zA-Z_] [a-zA-Z_0-9]* +HexNumber = '0x' [0-9a-fA-F]+ +DecimalNumber = [0-9]+ + ElementaryTypeName = 'address' | 'bool' | 'string' | 'var' | Int | Uint | Byte | Fixed | Ufixed -- cgit From 890a8818b5455eb067b5ee2ed6c9d32623c0c94c Mon Sep 17 00:00:00 2001 From: Federico Bond Date: Mon, 26 Dec 2016 22:57:00 -0300 Subject: Add unimplemented functions to grammar --- docs/grammar.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/grammar.txt b/docs/grammar.txt index 5fac96a1..c9f7f448 100644 --- a/docs/grammar.txt +++ b/docs/grammar.txt @@ -23,7 +23,7 @@ StructDefinition = 'struct' Identifier '{' ModifierDefinition = 'modifier' Identifier ParameterList? Block FunctionDefinition = 'function' Identifier? ParameterList ( FunctionCall | Identifier | 'constant' | 'payable' | 'external' | 'public' | 'internal' | 'private' )* - ( 'returns' ParameterList )? Block + ( 'returns' ParameterList )? ( ';' | Block ) EventDefinition = 'event' Identifier IndexedParameterList 'anonymous'? ';' EnumValue = Identifier -- cgit From 28550ac9ff52237ac2c9cf1919dd3adac017eca0 Mon Sep 17 00:00:00 2001 From: Federico Bond Date: Tue, 27 Dec 2016 23:08:57 -0300 Subject: Add UserDefinedTypeName to grammar.txt --- docs/grammar.txt | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/grammar.txt b/docs/grammar.txt index c9f7f448..f18ef2f3 100644 --- a/docs/grammar.txt +++ b/docs/grammar.txt @@ -14,7 +14,7 @@ ContractDefinition = ( 'contract' | 'library' ) Identifier ContractPart = StateVariableDeclaration | UsingForDeclaration | StructDefinition | ModifierDefinition | FunctionDefinition | EventDefinition | EnumDefinition -InheritanceSpecifier = Identifier ( '(' Expression ( ',' Expression )* ')' )? +InheritanceSpecifier = UserDefinedTypeName ( '(' Expression ( ',' Expression )* ')' )? StateVariableDeclaration = TypeName ( 'public' | 'internal' | 'private' )? Identifier ('=' Expression)? ';' UsingForDeclaration = 'using' Identifier 'for' ('*' | TypeName) ';' @@ -36,7 +36,15 @@ TypeNameList = '(' ( TypeName (',' TypeName )* )? ')' // semantic restriction: mappings and structs (recursively) containing mappings // are not allowed in argument lists VariableDeclaration = TypeName Identifier -TypeName = ElementaryTypeName | Identifier StorageLocation? | Mapping | ArrayTypeName | FunctionTypeName + +TypeName = ElementaryTypeName + | UserDefinedTypeName StorageLocation? + | Mapping + | ArrayTypeName + | FunctionTypeName + +UserDefinedTypeName = Identifier ( '.' Identifier )* + Mapping = 'mapping' '(' ElementaryTypeName '=>' TypeName ')' ArrayTypeName = TypeName '[' Expression? ']' StorageLocation? FunctionTypeName = 'function' TypeNameList ( 'internal' | 'external' | 'constant' | 'payable' )* -- cgit From 1f80dbb39a69eb3be94111d190aa8511990fa951 Mon Sep 17 00:00:00 2001 From: Federico Bond Date: Wed, 28 Dec 2016 12:53:40 -0300 Subject: Add missing right-hand side expression for shifts --- docs/grammar.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/grammar.txt b/docs/grammar.txt index c9f7f448..5965d1f6 100644 --- a/docs/grammar.txt +++ b/docs/grammar.txt @@ -69,7 +69,7 @@ Expression = | Expression '**' Expression | Expression ('*' | '/' | '%') Expression | Expression ('+' | '-') Expression - | Expression ('<<' | '>>') + | Expression ('<<' | '>>') Expression | Expression '&' Expression | Expression '^' Expression | Expression '|' Expression -- cgit From 6955b8f7b9040c66edb0666b1e4c36f1ba997ed5 Mon Sep 17 00:00:00 2001 From: Federico Bond Date: Wed, 28 Dec 2016 17:32:10 -0300 Subject: Remove wrong syntax highlighting in docs --- docs/miscellaneous.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/miscellaneous.rst b/docs/miscellaneous.rst index 4a9dad87..378c3c96 100644 --- a/docs/miscellaneous.rst +++ b/docs/miscellaneous.rst @@ -570,3 +570,4 @@ Language Grammar ================ .. literalinclude:: grammar.txt + :language: none -- cgit From 2d60990238898620739281827692c3adc247a9cc Mon Sep 17 00:00:00 2001 From: Yoichi Hirai Date: Fri, 30 Dec 2016 13:52:08 +0100 Subject: libevmasm: OpPop() optimization keeps the source location fixes #1521 --- libevmasm/PeepholeOptimiser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libevmasm/PeepholeOptimiser.cpp b/libevmasm/PeepholeOptimiser.cpp index b96b0295..923ffa67 100644 --- a/libevmasm/PeepholeOptimiser.cpp +++ b/libevmasm/PeepholeOptimiser.cpp @@ -120,7 +120,7 @@ struct OpPop: SimplePeepholeOptimizerMethod if (instructionInfo(instr).ret == 1 && !instructionInfo(instr).sideEffects) { for (int j = 0; j < instructionInfo(instr).args; j++) - *_out = Instruction::POP; + *_out = {Instruction::POP, _op.location()}; return true; } } -- cgit From 04af994bf5cc5f286e68c7164e41a4bb7a27dfec Mon Sep 17 00:00:00 2001 From: Federico Bond Date: Tue, 27 Dec 2016 15:29:25 -0300 Subject: Add parsers and grammars to documentation --- docs/index.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/index.rst b/docs/index.rst index 9bee1515..cb79687b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -87,6 +87,15 @@ Solidity Tools * `evmdis `_ EVM Disassembler that performs static analysis on the bytecode to provide a higher level of abstraction than raw EVM operations. +Third-Party Solidity Parsers and Grammars +----------------------------------------- + +* `solidity-parser `_ + Solidity parser for JavaScript + +* `Solidity Grammar for ANTLR 4 `_ + Solidity grammar for the ANTLR 4 parser generator + Language Documentation ---------------------- -- cgit From 82b9797428ce3bed3e7c8864ae183bd0432bd644 Mon Sep 17 00:00:00 2001 From: Federico Bond Date: Tue, 27 Dec 2016 16:10:09 -0300 Subject: Add ElementaryTypeNameExpression to grammar.txt --- docs/grammar.txt | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/grammar.txt b/docs/grammar.txt index c9f7f448..1dbd620a 100644 --- a/docs/grammar.txt +++ b/docs/grammar.txt @@ -82,7 +82,12 @@ Expression = | Expression? (',' Expression) | PrimaryExpression -PrimaryExpression = Identifier | BooleanLiteral | NumberLiteral | HexLiteral | StringLiteral +PrimaryExpression = Identifier + | BooleanLiteral + | NumberLiteral + | HexLiteral + | StringLiteral + | ElementaryTypeNameExpression FunctionCall = ( PrimaryExpression | NewExpression | TypeName ) ( ( '.' Identifier ) | ( '[' Expression ']' ) )* '(' Expression? ( ',' Expression )* ')' NewExpression = 'new' Identifier @@ -100,6 +105,8 @@ Identifier = [a-zA-Z_] [a-zA-Z_0-9]* HexNumber = '0x' [0-9a-fA-F]+ DecimalNumber = [0-9]+ +ElementaryTypeNameExpression = ElementaryTypeName + ElementaryTypeName = 'address' | 'bool' | 'string' | 'var' | Int | Uint | Byte | Fixed | Ufixed -- cgit From c014448397e4810fe28461d8dc5d54ba1cd376a4 Mon Sep 17 00:00:00 2001 From: Yoichi Hirai Date: Mon, 2 Jan 2017 16:55:29 +0100 Subject: doc: rephrase the case when two remapping keys are applicable Fixes #1536 --- docs/layout-of-source-files.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/layout-of-source-files.rst b/docs/layout-of-source-files.rst index 17ac8c6f..dff48be3 100644 --- a/docs/layout-of-source-files.rst +++ b/docs/layout-of-source-files.rst @@ -96,8 +96,8 @@ Use in Actual Compilers When the compiler is invoked, it is not only possible to specify how to discover the first element of a path, but it is possible to specify path prefix remappings so that e.g. ``github.com/ethereum/dapp-bin/library`` is remapped to -``/usr/local/dapp-bin/library`` and the compiler will read the files from there. If -remapping keys are prefixes of each other, the longest is tried first. This +``/usr/local/dapp-bin/library`` and the compiler will read the files from there. +If multiple remappings can be applied, the one with the longest key is tried first. This allows for a "fallback-remapping" with e.g. ``""`` maps to ``"/usr/local/include/solidity"``. Furthermore, these remappings can depend on the context, which allows you to configure packages to -- cgit From 96ca706353f94b60c7e356f11aa7466a15eca33d Mon Sep 17 00:00:00 2001 From: Yoichi Hirai Date: Fri, 2 Dec 2016 12:03:39 +0100 Subject: test: add a test case for #1286 --- test/libsolidity/SolidityEndToEndTest.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/test/libsolidity/SolidityEndToEndTest.cpp b/test/libsolidity/SolidityEndToEndTest.cpp index 94d4fb7f..d8539524 100644 --- a/test/libsolidity/SolidityEndToEndTest.cpp +++ b/test/libsolidity/SolidityEndToEndTest.cpp @@ -2770,6 +2770,28 @@ BOOST_AUTO_TEST_CASE(event_no_arguments) BOOST_CHECK_EQUAL(m_logs[0].topics[0], dev::keccak256(string("Deposit()"))); } +BOOST_AUTO_TEST_CASE(event_access_through_base_name) +{ + char const* sourceCode = R"( + contract A { + event x(); + } + contract B is A { + function f() returns (uint) { + A.x(); + return 1; + } + } + )"; + compileAndRun(sourceCode); + callContractFunction("f()"); + BOOST_REQUIRE_EQUAL(m_logs.size(), 1); + BOOST_CHECK_EQUAL(m_logs[0].address, m_contractAddress); + BOOST_CHECK(m_logs[0].data.empty()); + BOOST_REQUIRE_EQUAL(m_logs[0].topics.size(), 1); + BOOST_CHECK_EQUAL(m_logs[0].topics[0], dev::keccak256(string("x()"))); +} + BOOST_AUTO_TEST_CASE(event_anonymous) { char const* sourceCode = R"( -- cgit From 6a312233fcbaf9b7ec7b7088719a3be724ea228b Mon Sep 17 00:00:00 2001 From: Yoichi Hirai Date: Wed, 26 Oct 2016 13:08:00 +0200 Subject: ast, codegen: enable accessing events through contract names. Fixes #1286 --- Changelog.md | 3 +++ libsolidity/ast/AST.cpp | 3 +++ libsolidity/codegen/ExpressionCompiler.cpp | 42 +++++++++++++++++++++++------- 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/Changelog.md b/Changelog.md index 9faf0f8e..a82e8744 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,5 +1,8 @@ ### 0.4.8 (unreleased) +BugFixes: + * Type checker, code generator: enable access to events of base contracts' names. + ### 0.4.7 (2016-12-15) Features: diff --git a/libsolidity/ast/AST.cpp b/libsolidity/ast/AST.cpp index 3cd1dfbe..78d8949c 100644 --- a/libsolidity/ast/AST.cpp +++ b/libsolidity/ast/AST.cpp @@ -217,6 +217,9 @@ vector const& ContractDefinition::inheritableMembers() const for (EnumDefinition const* e: definedEnums()) addInheritableMember(e); + + for (EventDefinition const* e: events()) + addInheritableMember(e); } return *m_inheritableMembers; } diff --git a/libsolidity/codegen/ExpressionCompiler.cpp b/libsolidity/codegen/ExpressionCompiler.cpp index a7fb8408..3922da88 100644 --- a/libsolidity/codegen/ExpressionCompiler.cpp +++ b/libsolidity/codegen/ExpressionCompiler.cpp @@ -908,19 +908,43 @@ bool ExpressionCompiler::visit(MemberAccess const& _memberAccess) solAssert(_memberAccess.annotation().type, "_memberAccess has no type"); if (auto funType = dynamic_cast(_memberAccess.annotation().type.get())) { - if (funType->location() != FunctionType::Location::Internal) - { - _memberAccess.expression().accept(*this); - m_context << funType->externalIdentifier(); - } - else + switch (funType->location()) { + case FunctionType::Location::Internal: // We do not visit the expression here on purpose, because in the case of an // internal library function call, this would push the library address forcing // us to link against it although we actually do not need it. - auto const* function = dynamic_cast(_memberAccess.annotation().referencedDeclaration); - solAssert(!!function, "Function not found in member access"); - utils().pushCombinedFunctionEntryLabel(*function); + if (auto const* function = dynamic_cast(_memberAccess.annotation().referencedDeclaration)) + utils().pushCombinedFunctionEntryLabel(*function); + else + solAssert(false, "Function not found in member access"); + break; + case FunctionType::Location::Event: + if (!dynamic_cast(_memberAccess.annotation().referencedDeclaration)) + solAssert(false, "event not found"); + // no-op, because the parent node will do the job + break; + case FunctionType::Location::External: + case FunctionType::Location::Creation: + case FunctionType::Location::DelegateCall: + case FunctionType::Location::CallCode: + case FunctionType::Location::Send: + case FunctionType::Location::Bare: + case FunctionType::Location::BareCallCode: + case FunctionType::Location::BareDelegateCall: + _memberAccess.expression().accept(*this); + m_context << funType->externalIdentifier(); + break; + case FunctionType::Location::Log0: + case FunctionType::Location::Log1: + case FunctionType::Location::Log2: + case FunctionType::Location::Log3: + case FunctionType::Location::Log4: + case FunctionType::Location::ECRecover: + case FunctionType::Location::SHA256: + case FunctionType::Location::RIPEMD160: + default: + solAssert(false, "unsupported member function"); } } else if (dynamic_cast(_memberAccess.annotation().type.get())) -- cgit From 941b5fd928899f3d1b3f2cd14c008deb6825d807 Mon Sep 17 00:00:00 2001 From: chriseth Date: Tue, 3 Jan 2017 13:58:54 +0100 Subject: Require stable nodejs for deployment. --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 2160d175..820125cf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -71,6 +71,7 @@ matrix: dist: trusty sudo: required compiler: gcc + node_js: stable services: - docker before_install: -- cgit From 0bd8c204f04f679b4371199d876bc6160d913782 Mon Sep 17 00:00:00 2001 From: chriseth Date: Sat, 5 Nov 2016 16:58:06 +0100 Subject: Assembly definition. --- docs/assembly.rst | 510 +++++++++++++++++++++++++++++++++++++++++++++ docs/solidity-in-depth.rst | 1 + 2 files changed, 511 insertions(+) create mode 100644 docs/assembly.rst diff --git a/docs/assembly.rst b/docs/assembly.rst new file mode 100644 index 00000000..71fe4027 --- /dev/null +++ b/docs/assembly.rst @@ -0,0 +1,510 @@ +################# +Solidity Assembly +################# + +.. index:: ! assembly, ! asm, ! evmasm + +Solidity defines an assembly language that can also be used without Solidity. +This assembly language can also be used as "inline assembly" inside Solidity +source code. We start with describing how to use inline assembly and how it +differs from standalone assembly and then specify assembly itself. + +TODO: Write about how scoping rules of inline assembly are a bit different +and the complications that arise when for example using internal functions +of libraries. Furhermore, write about the symbols defined by the compiler. + +Inline Assembly +=============== + +For more fine-grained control especially in order to enhance the language by writing libraries, +it is possible to interleave Solidity statements with inline assembly in a language close +to the one of the virtual machine. Due to the fact that the EVM is a stack machine, it is +often hard to address the correct stack slot and provide arguments to opcodes at the correct +point on the stack. Solidity's inline assembly tries to facilitate that and other issues +arising when writing manual assembly by the following features: + +* functional-style opcodes: ``mul(1, add(2, 3))`` instead of ``push1 3 push1 2 add push1 1 mul`` +* assembly-local variables: ``let x := add(2, 3) let y := mload(0x40) x := add(x, y)`` +* access to external variables: ``function f(uint x) { assembly { x := sub(x, 1) } }`` +* labels: ``let x := 10 repeat: x := sub(x, 1) jumpi(repeat, eq(x, 0))`` + +We now want to describe the inline assembly language in detail. + +.. warning:: + Inline assembly is still a relatively new feature and might change if it does not prove useful, + so please try to keep up to date. + +Example +------- + +The following example provides library code to access the code of another contract and +load it into a ``bytes`` variable. This is not possible at all with "plain Solidity" and the +idea is that assembly libraries will be used to enhance the language in such ways. + +.. code:: + + library GetCode { + function at(address _addr) returns (bytes o_code) { + assembly { + // retrieve the size of the code, this needs assembly + let size := extcodesize(_addr) + // allocate output byte array - this could also be done without assembly + // by using o_code = new bytes(size) + o_code := mload(0x40) + // new "memory end" including padding + mstore(0x40, add(o_code, and(add(add(size, 0x20), 0x1f), not(0x1f)))) + // store length in memory + mstore(o_code, size) + // actually retrieve the code, this needs assembly + extcodecopy(_addr, add(o_code, 0x20), 0, size) + } + } + } + +Inline assembly could also be beneficial in cases where the optimizer fails to produce +efficient code. Please be aware that assembly is much more difficult to write because +the compiler does not perform checks, so you should use it for complex things only if +you really know what you are doing. + +.. code:: + + library VectorSum { + // This function is less efficient because the optimizer currently fails to + // remove the bounds checks in array access. + function sumSolidity(uint[] _data) returns (uint o_sum) { + for (uint i = 0; i < _data.length; ++i) + o_sum += _data[i]; + } + + // We know that we only access the array in bounds, so we can avoid the check. + // 0x20 needs to be added to an array because the first slot contains the + // array length. + function sumAsm(uint[] _data) returns (uint o_sum) { + for (uint i = 0; i < _data.length; ++i) { + assembly { + o_sum := mload(add(add(_data, 0x20), i)) + } + } + } + } + +Standalone Assembly +=================== + +Grammar +------- + +The assembly lexer follows the one defined by Solidity itself. + +Whitespace is used to delimit tokens and it consists of the characters +Space, Tab and Linefeed. Comments as defined below, are interpreted in the +same way as Whitespace. +Furthermore, the following tokens exist: + +TODO: escapes inside strings, decimal literals, hex literals, hex string literals + +``OneLineComment := "//" [^\n]*` +``MultiLineComment := "/*" .*? "*/"`` + +``String := '"' [^"]* '"' | "'" [^']* "'"`` +``Identifier := [_$a-zA-Z][_$a-zA-Z0-9]*`` +``Opcodes := +"add" | "addmod" | "address" | "and" | "balance" | "blockhash" | "byte" | "call" | +"callcode" | "calldatacopy" | "calldataload" | "calldatasize" | "caller" | "callvalue" | +"codecopy" | "codesize" | "coinbase" | "create" | "delegatecall" | "difficulty" | +"div" | "dup1" | "dup2" | "dup3" | "dup4" | "dup5" | "dup6" | "dup7" | "dup8" | "dup9" | +"dup10" | "dup11" | "dup12" | "dup13" | "dup14" | "dup15" | "dup16" | "eq" | "exp" | +"extcodecopy" | "extcodesize" | "gas" | "gaslimit" | "gasprice" | "gt" | "iszero" | +"jump" | "jumpi" | "log0" | "log1" | "log2" | "log3" | "log4" | "lt" | "mload" | "mod" | +"msize" | "mstore" | "mstore8" | "mul" | "mulmod" | "not" | "number" | "or" | "origin" | +"pc" | "pop" | "return" | "sdiv" | "selfdestruct" | "sgt" | "sha3" | "signextend" | +"sload" | "slt" | "smod" | "sstore" | "stop" | "sub" | "swap1" | "swap2" | "swap3" | +"swap4" | "swap5" | "swap6" | "swap7" | "swap8" | "swap9" | "swap10" | "swap11" | +"swap12" | "swap13" | "swap14" | "swap15" | "swap16" | "timestamp" | "xor"`` + +TODO: Define functional instruction, label, assignment, functional assignment, +variable declaration, ... + + +Syntax +------ + +Inline assembly parses comments, literals and identifiers exactly as Solidity, so you can use the +usual ``//`` and ``/* */`` comments. Inline assembly is initiated by ``assembly { ... }`` and inside +these curly braces, the following can be used (see the later sections for more details) + + - literals, i.e. ``0x123``, ``42`` or ``"abc"`` (strings up to 32 characters) + - opcodes (in "instruction style"), e.g. ``mload sload dup1 sstore``, for a list see below + - opcode in functional style, e.g. ``add(1, mlod(0))`` + - labels, e.g. ``name:`` + - variable declarations, e.g. ``let x := 7`` or ``let x := add(y, 3)`` + - identifiers (externals, labels or assembly-local variables), e.g. ``jump(name)``, ``3 x add`` + - assignments (in "instruction style"), e.g. ``3 =: x`` + - assignments in functional style, e.g. ``x := add(y, 3)`` + - blocks where local variables are scoped inside, e.g. ``{ let x := 3 { let y := add(x, 1) } }`` + +Opcodes +------- + +This document does not want to be a full description of the Ethereum virtual machine, but the +following list can be used as a reference of its opcodes. + +If an opcode takes arguments (always from the top of the stack), they are given in parentheses. +Note that the order of arguments can be seed to be reversed in non-functional style (explained below). +Opcodes marked with ``-`` do not push an item onto the stack, those marked with ``*`` are +special and all others push exactly one item onte the stack. + +In the following, ``mem[a...b)`` signifies the bytes of memory starting at position ``a`` up to +(excluding) position ``b`` and ``storage[p]`` signifies the storage contents at position ``p``. + +The opcodes ``pushi`` and ``jumpdest`` cannot be used directly. + ++-------------------------+------+-----------------------------------------------------------------+ +| stop + `-` | stop execution, identical to return(0,0) | ++-------------------------+------+-----------------------------------------------------------------+ +| add(x, y) | | x + y | ++-------------------------+------+-----------------------------------------------------------------+ +| sub(x, y) | | x - y | ++-------------------------+------+-----------------------------------------------------------------+ +| mul(x, y) | | x * y | ++-------------------------+------+-----------------------------------------------------------------+ +| div(x, y) | | x / y | ++-------------------------+------+-----------------------------------------------------------------+ +| sdiv(x, y) | | x / y, for signed numbers in two's complement | ++-------------------------+------+-----------------------------------------------------------------+ +| mod(x, y) | | x % y | ++-------------------------+------+-----------------------------------------------------------------+ +| smod(x, y) | | x % y, for signed numbers in two's complement | ++-------------------------+------+-----------------------------------------------------------------+ +| exp(x, y) | | x to the power of y | ++-------------------------+------+-----------------------------------------------------------------+ +| not(x) | | ~x, every bit of x is negated | ++-------------------------+------+-----------------------------------------------------------------+ +| lt(x, y) | | 1 if x < y, 0 otherwise | ++-------------------------+------+-----------------------------------------------------------------+ +| gt(x, y) | | 1 if x > y, 0 otherwise | ++-------------------------+------+-----------------------------------------------------------------+ +| slt(x, y) | | 1 if x < y, 0 otherwise, for signed numbers in two's complement | ++-------------------------+------+-----------------------------------------------------------------+ +| sgt(x, y) | | 1 if x > y, 0 otherwise, for signed numbers in two's complement | ++-------------------------+------+-----------------------------------------------------------------+ +| eq(x, y) | | 1 if x == y, 0 otherwise | ++-------------------------+------+-----------------------------------------------------------------+ +| iszero(x) | | 1 if x == 0, 0 otherwise | ++-------------------------+------+-----------------------------------------------------------------+ +| and(x, y) | | bitwise and of x and y | ++-------------------------+------+-----------------------------------------------------------------+ +| or(x, y) | | bitwise or of x and y | ++-------------------------+------+-----------------------------------------------------------------+ +| xor(x, y) | | bitwise xor of x and y | ++-------------------------+------+-----------------------------------------------------------------+ +| byte(n, x) | | nth byte of x, where the most significant byte is the 0th byte | ++-------------------------+------+-----------------------------------------------------------------+ +| addmod(x, y, m) | | (x + y) % m with arbitrary precision arithmetics | ++-------------------------+------+-----------------------------------------------------------------+ +| mulmod(x, y, m) | | (x * y) % m with arbitrary precision arithmetics | ++-------------------------+------+-----------------------------------------------------------------+ +| signextend(i, x) | | sign extend from (i*8+7)th bit counting from least significant | ++-------------------------+------+-----------------------------------------------------------------+ +| sha3(p, n) | | keccak(mem[p...(p+n))) | ++-------------------------+------+-----------------------------------------------------------------+ +| jump(label) | `-` | jump to label / code position | ++-------------------------+------+-----------------------------------------------------------------+ +| jumpi(label, cond) | `-` | jump to label if cond is nonzero | ++-------------------------+------+-----------------------------------------------------------------+ +| pc | | current position in code | ++-------------------------+------+-----------------------------------------------------------------+ +| pop | `*` | remove topmost stack slot | ++-------------------------+------+-----------------------------------------------------------------+ +| dup1 ... dup16 | | copy ith stack slot to the top (counting from top) | ++-------------------------+------+-----------------------------------------------------------------+ +| swap1 ... swap16 | `*` | swap topmost and ith stack slot below it | ++-------------------------+------+-----------------------------------------------------------------+ +| mload(p) | | mem[p..(p+32)) | ++-------------------------+------+-----------------------------------------------------------------+ +| mstore(p, v) | `-` | mem[p..(p+32)) := v | ++-------------------------+------+-----------------------------------------------------------------+ +| mstore8(p, v) | `-` | mem[p] := v & 0xff - only modifies a single byte | ++-------------------------+------+-----------------------------------------------------------------+ +| sload(p) | | storage[p] | ++-------------------------+------+-----------------------------------------------------------------+ +| sstore(p, v) | `-` | storage[p] := v | ++-------------------------+------+-----------------------------------------------------------------+ +| msize | | size of memory, i.e. largest accessed memory index | ++-------------------------+------+-----------------------------------------------------------------+ +| gas | | gas still available to execution | ++-------------------------+------+-----------------------------------------------------------------+ +| address | | address of the current contract / execution context | ++-------------------------+------+-----------------------------------------------------------------+ +| balance(a) | | wei balance at address a | ++-------------------------+------+-----------------------------------------------------------------+ +| caller | | call sender (excluding delegatecall) | ++-------------------------+------+-----------------------------------------------------------------+ +| callvalue | | wei sent together with the current call | ++-------------------------+------+-----------------------------------------------------------------+ +| calldataload(p) | | call data starting from position p (32 bytes) | ++-------------------------+------+-----------------------------------------------------------------+ +| calldatasize | | size of call data in bytes | ++-------------------------+------+-----------------------------------------------------------------+ +| calldatacopy(t, f, s) | `-` | copy s bytes from calldata at position f to mem at position t | ++-------------------------+------+-----------------------------------------------------------------+ +| codesize | | size of the code of the current contract / execution context | ++-------------------------+------+-----------------------------------------------------------------+ +| codecopy(t, f, s) | `-` | copy s bytes from code at position f to mem at position t | ++-------------------------+------+-----------------------------------------------------------------+ +| extcodesize(a) | | size of the code at address a | ++-------------------------+------+-----------------------------------------------------------------+ +| extcodecopy(a, t, f, s) | `-` | like codecopy(t, f, s) but take code at address a | ++-------------------------+------+-----------------------------------------------------------------+ +| create(v, p, s) | | create new contract with code mem[p..(p+s)) and send v wei | +| | | and return the new address | ++-------------------------+------+-----------------------------------------------------------------+ +| call(g, a, v, in, | | call contract at address a with input mem[in..(in+insize)] | +| insize, out, outsize) | | providing g gas and v wei and output area | +| | | mem[out..(out+outsize)] returting 1 on error (out of gas) | ++-------------------------+------+-----------------------------------------------------------------+ +| callcode(g, a, v, in, | | identical to call but only use the code from a and stay | +| insize, out, outsize) | | in the context of the current contract otherwise | ++-------------------------+------+-----------------------------------------------------------------+ +| delegatecall(g, a, in, | | identical to callcode but also keep ``caller`` | +| insize, out, outsize) | | and ``callvalue`` | ++-------------------------+------+-----------------------------------------------------------------+ +| return(p, s) | `*` | end execution, return data mem[p..(p+s)) | ++-------------------------+------+-----------------------------------------------------------------+ +| selfdestruct(a) | `*` | end execution, destroy current contract and send funds to a | ++-------------------------+------+-----------------------------------------------------------------+ +| log0(p, s) | `-` | log without topics and data mem[p..(p+s)) | ++-------------------------+------+-----------------------------------------------------------------+ +| log1(p, s, t1) | `-` | log with topic t1 and data mem[p..(p+s)) | ++-------------------------+------+-----------------------------------------------------------------+ +| log2(p, s, t1, t2) | `-` | log with topics t1, t2 and data mem[p..(p+s)) | ++-------------------------+------+-----------------------------------------------------------------+ +| log3(p, s, t1, t2, t3) | `-` | log with topics t1, t2, t3 and data mem[p..(p+s)) | ++-------------------------+------+-----------------------------------------------------------------+ +| log4(p, s, t1, t2, t3, | `-` | log with topics t1, t2, t3, t4 and data mem[p..(p+s)) | +| t4) | | | ++-------------------------+------+-----------------------------------------------------------------+ +| origin | | transaction sender | ++-------------------------+------+-----------------------------------------------------------------+ +| gasprice | | gas price of the transaction | ++-------------------------+------+-----------------------------------------------------------------+ +| blockhash(b) | | hash of block nr b - only for last 256 blocks excluding current | ++-------------------------+------+-----------------------------------------------------------------+ +| coinbase | | current mining beneficiary | ++-------------------------+------+-----------------------------------------------------------------+ +| timestamp | | timestamp of the current block in seconds since the epoch | ++-------------------------+------+-----------------------------------------------------------------+ +| number | | current block number | ++-------------------------+------+-----------------------------------------------------------------+ +| difficulty | | difficulty of the current block | ++-------------------------+------+-----------------------------------------------------------------+ +| gaslimit | | block gas limit of the current block | ++-------------------------+------+-----------------------------------------------------------------+ + +Literals +-------- + +You can use integer constants by typing them in decimal or hexadecimal notation and an +appropriate ``PUSHi`` instruction will automatically be generated. The following creates code +to add 2 and 3 resulting in 5 and then computes the bitwise and with the string "abc". +Strings are stored left-aligned and cannot be longer than 32 bytes. + +.. code:: + + assembly { 2 3 add "abc" and } + +Functional Style +----------------- + +You can type opcode after opcode in the same way they will end up in bytecode. For example +adding ``3`` to the contents in memory at position ``0x80`` would be + +.. code:: + + 3 0x80 mload add 0x80 mstore + +As it is often hard to see what the actual arguments for certain opcodes are, +Solidity inline assembly also provides a "functional style" notation where the same code +would be written as follows + +.. code:: + + mstore(0x80, add(mload(0x80), 3)) + +Functional style and instructional style can be mixed, but any opcode inside a +functional style expression has to return exactly one stack slot (most of the opcodes do). + +Note that the order of arguments is reversed in functional-style as opposed to the instruction-style +way. If you use functional-style, the first argument will end up on the stack top. + + +Access to External Variables and Functions +------------------------------------------ + +Solidity variables and other identifiers can be accessed by simply using their name. +For storage and memory variables, this will push the address and not the value onto the +stack. Also note that non-struct and non-array storage variable addresses occupy two slots +on the stack: One for the address and one for the byte offset inside the storage slot. +In assignments (see below), we can even use local Solidity variables to assign to. + +Functions external to inline assembly can also be accessed: The assembly will +push their entry label (with virtual function resolution applied). The calling semantics +in solidity are: + + - the caller pushes return label, arg1, arg2, ..., argn + - the call returns with ret1, ret2, ..., retn + +This feature is still a bit cumbersome to use, because the stack offset essentially +changes during the call, and thus references to local variables will be wrong. +It is planned that the stack height changes can be specified in inline assembly. + +.. code:: + + contract C { + uint b; + function f(uint x) returns (uint r) { + assembly { + b pop // remove the offset, we know it is zero + sload + x + mul + =: r // assign to return variable r + } + } + } + +Labels +------ + +Another problem in EVM assembly is that ``jump`` and ``jumpi`` use absolute addresses +which can change easily. Solidity inline assembly provides labels to make the use of +jumps easier. The following code computes an element in the Fibonacci series. + +.. code:: + + { + let n := calldataload(4) + let a := 1 + let b := a + loop: + jumpi(loopend, eq(n, 0)) + a add swap1 + n := sub(n, 1) + jump(loop) + loopend: + mstore(0, a) + return(0, 0x20) + } + +Please note that automatically accessing stack variables can only work if the +assembler knows the current stack height. This fails to work if the jump source +and target have different stack heights. It is still fine to use such jumps, +you should just not access any stack variables (even assembly variables) in that case. + +Furthermore, the stack height analyser goes through the code opcode by opcode +(and not according to control flow), so in the following case, the assembler +will have a wrong impression about the stack height at label ``two``: + +.. code:: + + { + jump(two) + one: + // Here the stack height is 1 (because we pushed 7), + // but the assembler thinks it is 0 because it reads + // from top to bottom. + // Accessing stack variables here will lead to errors. + jump(three) + two: + 7 // push something onto the stack + jump(one) + three: + } + + +Declaring Assembly-Local Variables +---------------------------------- + +You can use the ``let`` keyword to declare variables that are only visible in +inline assembly and actually only in the current ``{...}``-block. What happens +is that the ``let`` instruction will create a new stack slot that is reserved +for the variable and automatically removed again when the end of the block +is reached. You need to provide an initial value for the variable which can +be just ``0``, but it can also be a complex functional-style expression. + +.. code:: + + contract C { + function f(uint x) returns (uint b) { + assembly { + let v := add(x, 1) + mstore(0x80, v) + { + let y := add(sload(v), 1) + b := y + } // y is "deallocated" here + b := add(b, v) + } // v is "deallocated" here + } + } + + +Assignments +----------- + +Assignments are possible to assembly-local variables and to function-local +variables. Take care that when you assign to variables that point to +memory or storage, you will only change the pointer and not the data. + +There are two kinds of assignments: Functional-style and instruction-style. +For functional-style assignments (``variable := value``), you need to provide a value in a +functional-style expression that results in exactly one stack value +and for instruction-style (``=: variable``), the value is just taken from the stack top. +For both ways, the colon points to the name of the variable. + +.. code:: + + assembly { + let v := 0 // functional-style assignment as part of variable declaration + let g := add(v, 2) + sload(10) + =: v // instruction style assignment, puts the result of sload(10) into v + } + + +Things to Avoid +--------------- + +Inline assembly might have a quite high-level look, but it actually is extremely +low-level. The only thing the assembler does for you is re-arranging +functional-style opcodes, managing jump labels, counting stack height for +variable access and removing stack slots for assembly-local variables when the end +of their block is reached. Especially for those two last cases, it is important +to know that the assembler only counts stack height from top to bottom, not +necessarily following control flow. Furthermore, operations like swap will only +swap the contents of the stack but not the location of variables. + +Conventions in Solidity +----------------------- + +In contrast to EVM assembly, Solidity knows types which are narrower than 256 bits, +e.g. ``uint24``. In order to make them more efficient, most arithmetic operations just +treat them as 256 bit numbers and the higher-order bits are only cleaned at the +point where it is necessary, i.e. just shortly before they are written to memory +or before comparisons are performed. This means that if you access such a variable +from within inline assembly, you might have to manually clean the higher order bits +first. + +Solidity manages memory in a very simple way: There is a "free memory pointer" +at position ``0x40`` in memory. If you want to allocate memory, just use the memory +from that point on and update the pointer accordingly. + +Elements in memory arrays in Solidity always occupy multiples of 32 bytes (yes, this is +even true for ``byte[]``, but not for ``bytes`` and ``string``). Multi-dimensional memory +arrays are pointers to memory arrays. The length of a dynamic array is stored at the +first slot of the array and then only the array elements follow. + +.. warning:: + Statically-sized memory arrays do not have a length field, but it will be added soon + to allow better convertibility between statically- and dynamically-sized arrays, so + please do not rely on that. diff --git a/docs/solidity-in-depth.rst b/docs/solidity-in-depth.rst index 40704698..b6217b47 100644 --- a/docs/solidity-in-depth.rst +++ b/docs/solidity-in-depth.rst @@ -16,4 +16,5 @@ If something is missing here, please contact us on units-and-global-variables.rst control-structures.rst contracts.rst + assembly.rst miscellaneous.rst -- cgit From a285ca44f228236ab92eb8dc9d878053829276b4 Mon Sep 17 00:00:00 2001 From: chriseth Date: Tue, 15 Nov 2016 14:01:11 +0100 Subject: Assembly output for Assembly object. --- libevmasm/Assembly.cpp | 69 ++++++++++---------------------------- libevmasm/AssemblyItem.cpp | 82 ++++++++++++++++++++++++++++++++++++++++++++++ libevmasm/AssemblyItem.h | 5 +++ 3 files changed, 105 insertions(+), 51 deletions(-) diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index a9ca24dc..f50a38a6 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -117,69 +117,36 @@ string Assembly::locationFromSources(StringMap const& _sourceCodes, SourceLocati ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap const& _sourceCodes) const { - _out << _prefix << ".code:" << endl; - for (AssemblyItem const& i: m_items) + for (size_t i = 0; i < m_items.size(); ++i) { - _out << _prefix; - switch (i.type()) + AssemblyItem const& item = m_items[i]; + if (!item.location().isEmpty() && (i == 0 || m_items[i - 1].location() != item.location())) { - case Operation: - _out << " " << instructionInfo(i.instruction()).name << "\t" << i.getJumpTypeAsString(); - break; - case Push: - _out << " PUSH" << dec << max(1, dev::bytesRequired(i.data())) << " 0x" << hex << i.data(); - break; - case PushString: - _out << " PUSH \"" << m_strings.at((h256)i.data()) << "\""; - break; - case PushTag: - if (i.data() == 0) - _out << " PUSH [ErrorTag]"; - else - { - size_t subId = i.splitForeignPushTag().first; - if (subId == size_t(-1)) - _out << " PUSH [tag" << dec << i.splitForeignPushTag().second << "]"; - else - _out << " PUSH [tag" << dec << subId << ":" << i.splitForeignPushTag().second << "]"; - } - break; - case PushSub: - _out << " PUSH [$" << size_t(i.data()) << "]"; - break; - case PushSubSize: - _out << " PUSH #[$" << size_t(i.data()) << "]"; - break; - case PushProgramSize: - _out << " PUSHSIZE"; - break; - case PushLibraryAddress: - _out << " PUSHLIB \"" << m_libraries.at(h256(i.data())) << "\""; - break; - case Tag: - _out << "tag" << dec << i.data() << ": " << endl << _prefix << " JUMPDEST"; - break; - case PushData: - _out << " PUSH [" << hex << (unsigned)i.data() << "]"; - break; - default: - BOOST_THROW_EXCEPTION(InvalidOpcode()); + _out << _prefix << " /*"; + if (item.location().sourceName) + _out << " \"" + *item.location().sourceName + "\""; + if (!item.location().isEmpty()) + _out << ":" << to_string(item.location().start) + ":" + to_string(item.location().end); + _out << " */" << endl; } - _out << "\t\t" << locationFromSources(_sourceCodes, i.location()) << endl; + _out << _prefix << (item.type() == Tag ? "" : " ") << item.toAssemblyText() << endl; } if (!m_data.empty() || !m_subs.empty()) { - _out << _prefix << ".data:" << endl; + _out << _prefix << "stop" << endl; + Json::Value data; for (auto const& i: m_data) - if (u256(i.first) >= m_subs.size()) - _out << _prefix << " " << hex << (unsigned)(u256)i.first << ": " << dev::toHex(i.second) << endl; + assertThrow(u256(i.first) < m_subs.size(), AssemblyException, "Data not yet implemented."); + for (size_t i = 0; i < m_subs.size(); ++i) { - _out << _prefix << " " << hex << i << ": " << endl; - m_subs[i]->stream(_out, _prefix + " ", _sourceCodes); + _out << endl << _prefix << "sub_" << i << ": assembly {\n"; + m_subs[i]->streamAsm(_out, _prefix + " ", _sourceCodes); + _out << _prefix << "}" << endl; } } + return _out; } diff --git a/libevmasm/AssemblyItem.cpp b/libevmasm/AssemblyItem.cpp index 54e38de8..8b3f920a 100644 --- a/libevmasm/AssemblyItem.cpp +++ b/libevmasm/AssemblyItem.cpp @@ -20,6 +20,7 @@ */ #include "AssemblyItem.h" +#include #include using namespace std; @@ -97,6 +98,28 @@ int AssemblyItem::deposit() const return 0; } +bool AssemblyItem::canBeFunctional() const +{ + switch (m_type) + { + case Operation: + return !SemanticInformation::isDupInstruction(*this) && !SemanticInformation::isSwapInstruction(*this); + case Push: + case PushString: + case PushTag: + case PushData: + case PushSub: + case PushSubSize: + case PushProgramSize: + case PushLibraryAddress: + return true; + case Tag: + return false; + default:; + } + return 0; +} + string AssemblyItem::getJumpTypeAsString() const { switch (m_jumpType) @@ -111,6 +134,65 @@ string AssemblyItem::getJumpTypeAsString() const } } +string AssemblyItem::toAssemblyText() const +{ + string text; + switch (type()) + { + case Operation: + { + assertThrow(isValidInstruction(instruction()), AssemblyException, "Invalid instruction."); + string name = instructionInfo(instruction()).name; + transform(name.begin(), name.end(), name.begin(), [](unsigned char _c) { return tolower(_c); }); + text = name; + break; + } + case Push: + text = toHex(toCompactBigEndian(data(), 1), 1, HexPrefix::Add); + break; + case PushString: + assertThrow(false, AssemblyException, "Push string assembly output not implemented."); + break; + case PushTag: + assertThrow(data() < 0x10000, AssemblyException, "Sub-assembly tags not yet implemented."); + text = string("tag_") + to_string(size_t(data())); + break; + case Tag: + assertThrow(data() < 0x10000, AssemblyException, "Sub-assembly tags not yet implemented."); + text = string("tag_") + to_string(size_t(data())) + ":"; + break; + case PushData: + assertThrow(false, AssemblyException, "Push data not implemented."); + break; + case PushSub: + text = string("dataOffset(sub_") + to_string(size_t(data())) + ")"; + break; + case PushSubSize: + text = string("dataSize(sub_") + to_string(size_t(data())) + ")"; + break; + case PushProgramSize: + text = string("programSize"); + break; + case PushLibraryAddress: + text = string("linkerSymbol(\"") + toHex(data()) + string("\")"); + break; + case UndefinedItem: + assertThrow(false, AssemblyException, "Invalid assembly item."); + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + if (m_jumpType == JumpType::IntoFunction || m_jumpType == JumpType::OutOfFunction) + { + text += "\t//"; + if (m_jumpType == JumpType::IntoFunction) + text += " in"; + else + text += " out"; + } + return text; +} + ostream& dev::eth::operator<<(ostream& _out, AssemblyItem const& _item) { switch (_item.type()) diff --git a/libevmasm/AssemblyItem.h b/libevmasm/AssemblyItem.h index b5bd3ed8..cddfb17c 100644 --- a/libevmasm/AssemblyItem.h +++ b/libevmasm/AssemblyItem.h @@ -97,6 +97,9 @@ public: unsigned bytesRequired(unsigned _addressLength) const; int deposit() const; + /// @returns true if the assembly item can be used in a functional context. + bool canBeFunctional() const; + bool match(AssemblyItem const& _i) const { return _i.m_type == UndefinedItem || (m_type == _i.m_type && (m_type != Operation || m_data == _i.m_data)); } void setLocation(SourceLocation const& _location) { m_location = _location; } SourceLocation const& location() const { return m_location; } @@ -108,6 +111,8 @@ public: void setPushedValue(u256 const& _value) const { m_pushedValue = std::make_shared(_value); } u256 const* pushedValue() const { return m_pushedValue.get(); } + std::string toAssemblyText() const; + private: AssemblyItemType m_type; u256 m_data; -- cgit From 49ac2a1ee5005f641e875823a84ee73c9e93a7c5 Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 9 Dec 2016 11:03:29 +0100 Subject: Rename programSize to bytecodeSize. --- libevmasm/AssemblyItem.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libevmasm/AssemblyItem.cpp b/libevmasm/AssemblyItem.cpp index 8b3f920a..b790e094 100644 --- a/libevmasm/AssemblyItem.cpp +++ b/libevmasm/AssemblyItem.cpp @@ -171,7 +171,7 @@ string AssemblyItem::toAssemblyText() const text = string("dataSize(sub_") + to_string(size_t(data())) + ")"; break; case PushProgramSize: - text = string("programSize"); + text = string("bytecodeSize"); break; case PushLibraryAddress: text = string("linkerSymbol(\"") + toHex(data()) + string("\")"); -- cgit From 9683cfea6dbbdf8f82e6cd58d52360f958b2322c Mon Sep 17 00:00:00 2001 From: chriseth Date: Tue, 3 Jan 2017 15:19:14 +0100 Subject: Update to new assembly specification. --- docs/assembly.rst | 412 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 381 insertions(+), 31 deletions(-) diff --git a/docs/assembly.rst b/docs/assembly.rst index 71fe4027..8ba3f56c 100644 --- a/docs/assembly.rst +++ b/docs/assembly.rst @@ -27,6 +27,12 @@ arising when writing manual assembly by the following features: * assembly-local variables: ``let x := add(2, 3) let y := mload(0x40) x := add(x, y)`` * access to external variables: ``function f(uint x) { assembly { x := sub(x, 1) } }`` * labels: ``let x := 10 repeat: x := sub(x, 1) jumpi(repeat, eq(x, 0))`` +* loops: ``for { let i := 0 } lt(i, x) { i := add(i, 1) } { y := mul(2, y) }`` +* switch statements: ``switch x case 0: { y := mul(x, 2) } default: { y := 0 }`` +* function calls: ``function f(x) -> (y) { switch x case 0: { y := 1 } default: y := mul(x, f(sub(x, 1))) }`` + +.. note:: + Of the above, loops, function calls and switch statements are not yet implemented. We now want to describe the inline assembly language in detail. @@ -91,40 +97,165 @@ you really know what you are doing. Standalone Assembly =================== -Grammar -------- +This assembly language tries to achieve several goals: + +1. Programs written in it should be readable, even if the code is generated by a compiler from Solidity. +2. The translation from assembly to bytecode should contain as few "surprises" as possible. +3. Control flow should be easy to detect to help in formal verification and optimization. + +In order to achieve the first and last goal, assembly provides high-level constructs +like ``for`` loops, ``switch`` statements and function calls. It should be possible +to write assembly programs that do not make use of explicit ``SWAP``, ``DUP``, +``JUMP`` and ``JUMPI`` statements, because the first two obfuscate the data flow +and the last two obfuscate control flow. Furthermore, functional statements of +the form ``mul(add(x, y), 7)`` are preferred over pure opcode statements like +``7 x y add mul`` because in the first form, it is much easier to see which +operand is used for which opcode. + +The second goal is achieved by introducing a desugaring phase that only removes +the higher level constructs in a very regular way and still allows inspecting +the generated low-level assembly code. The only non-local operation performed +by the assembler is name lookup of user-defined identifiers (functions, variables, ...), +which follow very simple and regular scoping rules and cleanup of local variables from the stack. + +Scoping: An identifier that is declared (label, variable, function, assembly) +is only visible in the block where it was declared (including nested blocks +inside the current block). It is not legal to access local variables across +function borders, even if they would be in scope. Shadowing is allowed, but +two identifiers with the same name cannot be declared in the same block. +Local variables cannot be accessed before they were declared, but labels, +functions and assemblies can. Assemblies are special blocks that are used +for e.g. returning runtime code or creating contracts. No identifier from an +outer assembly is visible in a sub-assembly. + +If control flow passes over the end of a block, pop instructions are inserted +that match the number of local variables declared in that block, unless the +``}`` is directly preceded by an opcode that does not have a continuing control +flow path. The stack height is reduced by the number of local variables +regardless of that. This mean that labels in the next block will have the +same height as before the block that just ended. + +If at the end of a block, the stack is not balanced, a warning is issued, +unless the last instruction in the block did not have a continuing control flow path. + +Why do we use higher-level constructs like ``switch``, ``for`` and functions: + +Using ``switch``, ``for`` and functions, it should be possible to write +complex code without using ``jump`` or ``jumpi`` manually. This makes it much +easier to analyze the control flow, which allows for improved formal +verification and optimization. + +Furthermore, if manual jumps are allowed, computing the stack height is rather complicated. +The position of all local variables on the stack needs to be known, otherwise +neither references to local variables nor removing local variables automatically +from the stack at the end of a block will work properly. Because of that, +every label that is preceded by an instruction that ends or diverts control flow +should be annotated with the current stack layout. This annotation is performed +automatically during the desugaring phase. + +Example: + +We will follow an example compilation from Solidity to desugared assembly. +We consider the runtime bytecode of the following Solidity program:: -The assembly lexer follows the one defined by Solidity itself. + contract C { + function f(uint x) returns (uint y) { + y = 1 + for (uint i = 0; i < x; i++) + y = 2 * y; + } + } -Whitespace is used to delimit tokens and it consists of the characters -Space, Tab and Linefeed. Comments as defined below, are interpreted in the -same way as Whitespace. -Furthermore, the following tokens exist: - -TODO: escapes inside strings, decimal literals, hex literals, hex string literals - -``OneLineComment := "//" [^\n]*` -``MultiLineComment := "/*" .*? "*/"`` - -``String := '"' [^"]* '"' | "'" [^']* "'"`` -``Identifier := [_$a-zA-Z][_$a-zA-Z0-9]*`` -``Opcodes := -"add" | "addmod" | "address" | "and" | "balance" | "blockhash" | "byte" | "call" | -"callcode" | "calldatacopy" | "calldataload" | "calldatasize" | "caller" | "callvalue" | -"codecopy" | "codesize" | "coinbase" | "create" | "delegatecall" | "difficulty" | -"div" | "dup1" | "dup2" | "dup3" | "dup4" | "dup5" | "dup6" | "dup7" | "dup8" | "dup9" | -"dup10" | "dup11" | "dup12" | "dup13" | "dup14" | "dup15" | "dup16" | "eq" | "exp" | -"extcodecopy" | "extcodesize" | "gas" | "gaslimit" | "gasprice" | "gt" | "iszero" | -"jump" | "jumpi" | "log0" | "log1" | "log2" | "log3" | "log4" | "lt" | "mload" | "mod" | -"msize" | "mstore" | "mstore8" | "mul" | "mulmod" | "not" | "number" | "or" | "origin" | -"pc" | "pop" | "return" | "sdiv" | "selfdestruct" | "sgt" | "sha3" | "signextend" | -"sload" | "slt" | "smod" | "sstore" | "stop" | "sub" | "swap1" | "swap2" | "swap3" | -"swap4" | "swap5" | "swap6" | "swap7" | "swap8" | "swap9" | "swap10" | "swap11" | -"swap12" | "swap13" | "swap14" | "swap15" | "swap16" | "timestamp" | "xor"`` - -TODO: Define functional instruction, label, assignment, functional assignment, -variable declaration, ... +The following assembly will be generated:: + + { + mstore(0x40, 0x60) // store the "free memory pointer" + // function dispatcher + switch div(calldataload(0), exp(2, 226)) + case 0xb3de648b: { + let (r,) = f(calldataload(4)) + let ret := $allocate(0x20) + mstore(ret, r) + return(ret, 0x20) + } + default: { jump(invalidJumpLabel) } + // memory allocator + function $allocate(size) -> (pos) { + pos := mload(0x40) + mstore(0x40, add(pos, size)) + } + // the contract function + function f(x) -> (y) { + y := 1 + for { let i := 0 } lt(i, x) { i := add(i, 1) } { + y := mul(2, y) + } + } + } +After the desugaring phase it looks as follows:: + + { + mstore(0x40, 0x60) + { + let $0 := div(calldataload(0), exp(2, 226)) + jumpi($case1, eq($0, 0xb3de648b)) + jump($caseDefault) + $case1: + { + // the function call - we put return label and arguments on the stack + $ret1 calldataload(4) jump($fun_f) + $ret1 [r]: // a label with a [...]-annotation resets the stack height + // to "current block + number of local variables". It also + // introduces a variable, r: + // r is at top of stack, $0 is below (from enclosing block) + $ret2 0x20 jump($fun_allocate) + $ret2 [ret]: // stack here: $0, r, ret (top) + mstore(ret, r) + return(ret, 0x20) + // although it is useless, the jump is automatically inserted, + // since the desugaring process does not analyze control-flow + jump($endswitch) + } + $caseDefault: + { + jump(invalidJumpLabel) + jump($endswitch) + } + $endswitch: + } + jump($afterFunction) + $fun_allocate: + { + $start[$retpos, size]: + let pos := 0 + { + pos := mload(0x40) + mstore(0x40, add(pos, size)) + } + swap1 pop swap1 jump + } + $fun_f: + { + start [$retpos, x]: + let y := 0 + { + let i := 0 + $for_begin: + jumpi($for_end, iszero(lt(i, x))) + { + y := mul(2, y) + } + $for_continue: + { i := add(i, 1) } + jump($for_begin) + $for_end: + } // Here, a pop instruction is inserted for i + swap1 pop swap1 jump + } + $afterFunction: + stop + } Syntax ------ @@ -159,6 +290,8 @@ In the following, ``mem[a...b)`` signifies the bytes of memory starting at posit The opcodes ``pushi`` and ``jumpdest`` cannot be used directly. +In the grammar, opcodes are represented as pre-defined identifiers. + +-------------------------+------+-----------------------------------------------------------------+ | stop + `-` | stop execution, identical to return(0,0) | +-------------------------+------+-----------------------------------------------------------------+ @@ -508,3 +641,220 @@ first slot of the array and then only the array elements follow. Statically-sized memory arrays do not have a length field, but it will be added soon to allow better convertibility between statically- and dynamically-sized arrays, so please do not rely on that. + + +Specification +============= + +Assembly happens in four stages: + +1. Parsing +2. Desugaring (removes switch, for and functions) +3. Opcode stream generation +4. Bytecode generation + + +Parsing / Grammar +----------------- + +The tasks of the parser are the following: + +- Turn the byte stream into a token stream, discarding C++-style comments + (a special comment exists for source references, but we will not explain it here). +- Turn the token stream into an AST according to the grammar below +- Register identifiers with the block they are defined in (annotation to the + AST node) and note from which point on, variables can be accessed. + +The assembly lexer follows the one defined by Solidity itself. + +Whitespace is used to delimit tokens and it consists of the characters +Space, Tab and Linefeed. Comments are regular JavaScript/C++ comments and +are interpreted in the same way as Whitespace. + +Grammar:: + + AssemblyBlock = '{' AssemblyItem* '}' + AssemblyItem = + Identifier | + AssemblyBlock | + FunctionalAssemblyExpression | + AssemblyLocalDefinition | + FunctionalAssemblyAssignment | + AssemblyAssignment | + LabelDefinition | + AssemblySwitch | + AssemblyFunctionDefinition | + AssemblyFor | + 'break' | 'continue' | + SubAssembly | 'dataSize' '(' Identifier ')' | + LinkerSymbol | + 'errorLabel' | 'bytecodeSize' | + NumberLiteral | StringLiteral | HexLiteral + Identifier = [a-zA-Z_$] [a-zA-Z_0-9]* + FunctionalAssemblyExpression = Identifier '(' ( AssemblyItem ( ',' AssemblyItem )* )? ')' + AssemblyLocalDefinition = 'let' IdentifierOrList ':=' FunctionalAssemblyExpression + FunctionalAssemblyAssignment = IdentifierOrList ':=' FunctionalAssemblyExpression + IdentifierOrList = Identifier | '(' IdentifierList ')' + IdentifierList = Identifier ( ',' Identifier)* + AssemblyAssignment = '=:' Identifier + LabelDefinition = Identifier ( '[' ( IdentifierList | NumberLiteral ) ']' )? ':' + AssemblySwitch = 'switch' FunctionalAssemblyExpression AssemblyCase* + ( 'default' ':' AssemblyBlock )? + AssemblyCase = 'case' FunctionalAssemblyExpression ':' AssemblyBlock + AssemblyFunctionDefinition = 'function' Identifier '(' IdentifierList? ')' '->' + ( '(' IdentifierList ')' AssemblyBlock + AssemblyFor = 'for' ( AssemblyBlock | FunctionalAssemblyExpression) + FunctionalAssemblyExpression ( AssemblyBlock | FunctionalAssemblyExpression) AssemblyBlock + SubAssembly = 'assembly' Identifier AssemblyBlock + LinkerSymbol = 'linkerSymbol' '(' StringLiteral ')' + NumberLiteral = HexNumber | DecimalNumber + HexLiteral = 'hex' ('"' ([0-9a-fA-F]{2})* '"' | '\'' ([0-9a-fA-F]{2})* '\'') + StringLiteral = '"' ([^"\r\n\\] | '\\' .)* '"' + HexNumber = '0x' [0-9a-fA-F]+ + DecimalNumber = [0-9]+ + + +Desugaring +---------- + +An AST transformation removes for, switch and function constructs. The result +is still parseable by the same parser, but it will not use certain constructs. +If jumpdests are added that are only jumped to and not continued at, information +about the stack content is added, unless no local variables of outer scopes are +accessed or the stack height is the same as for the previous instruction. + +Pseudocode:: + + desugar item: AST -> AST = + match item { + AssemblyFunctionDefinition('function' name '(' arg1, ..., argn ')' '->' ( '(' ret1, ..., retm ')' body) -> + : + { + $_start [$retPC, $argn, ..., arg1]: + let ret1 := 0 ... let retm := 0 + { desugar(body) } + swap and pop items so that only ret1, ... retn, $retPC are left on the stack + jump + } + AssemblyFor('for' { init } condition post body) -> + { + init // cannot be its own block because we want variable scope to extend into the body + // find I such that there are no labels $forI_* + $forI_begin: + jumpi($forI_end, iszero(condition)) + { body } + $forI_continue: + { post } + jump($forI_begin) + $forI_end: + } + 'break' -> + { + // find nearest enclosing scope with label $forI_end + pop all local variables that are defined at the current point + but not at $forI_end + jump($forI_end) + } + 'continue' -> + { + // find nearest enclosing scope with label $forI_continue + pop all local variables that are defined at the current point + but not at $forI_continue + jump($forI_continue) + } + AssemblySwitch(switch condition cases ( default: defaultBlock )? ) -> + { + // find I such that there is no $switchI* label or variable + let $switchI_value := condition + for each of cases match { + case val: -> jumpi($switchI_caseJ, eq($switchI_value, val)) + } + if default block present: -> + { defaultBlock jump($switchI_end) } + for each of cases match { + case val: { body } -> $switchI_caseJ: { body jump($switchI_end) } + } + $switchI_end: + } + FunctionalAssemblyExpression( identifier(arg1, arg2, ..., argn) ) -> + { + if identifier is function with n args and m ret values -> + { + // find I such that $funcallI_* does not exist + $funcallI_return argn ... arg2 arg1 jump() + if the current context is `let (id1, ..., idm) := f(...)` -> + $funcallI_return [id1, ..., idm]: + else -> + $funcallI_return[m - n - 1]: + turn the functional expression that leads to the function call + into a statement stream + } + else -> desugar(children of node) + } + default node -> + desugar(children of node) + } + +Opcode Stream Generation +------------------------ + +During opcode stream generation, we keep track of the current stack height, +so that accessing stack variables by name is possible. + +Pseudocode:: + + codegen item: AST -> opcode_stream = + match item { + AssemblyBlock({ items }) -> + join(codegen(item) for item in items) + if last generated opcode has continuing control flow: + POP for all local variables registered at the block (including variables + introduced by labels) + warn if the stack height at this point is not the same as at the start of the block + Identifier(id) -> + lookup id in the syntactic stack of blocks + match type of id + Local Variable -> + DUPi where i = 1 + stack_height - stack_height_of_identifier(id) + Label -> + // reference to be resolved during bytecode generation + PUSH + SubAssembly -> + PUSH + FunctionalAssemblyExpression(id ( arguments ) ) -> + join(codegen(arg) for arg in arguments.reversed()) + id (which has to be an opcode, might be a function name later) + AssemblyLocalDefinition(let (id1, ..., idn) := expr) -> + register identifiers id1, ..., idn as locals in current block at current stack height + codegen(expr) - assert that expr returns n items to the stack + FunctionalAssemblyAssignment((id1, ..., idn) := expr) -> + lookup id1, ..., idn in the syntactic stack of blocks, assert that they are variables + codegen(expr) + for j = n, ..., i: + SWAPi where i = 1 + stack_height - stack_height_of_identifier(idj) + POP + AssemblyAssignment(=: id) -> + look up id in the syntactic stack of blocks, assert that it is a variable + SWAPi where i = 1 + stack_height - stack_height_of_identifier(id) + POP + LabelDefinition(name [id1, ..., idn] :) -> + JUMPDEST + // register new variables id1, ..., idn and set the stack height to + // stack_height_at_block_start + number_of_local_variables + LabelDefinition(name [number] :) -> + JUMPDEST + // adjust stack height by +number (can be negative) + NumberLiteral(num) -> + PUSH + HexLiteral(lit) -> + PUSH32 + StringLiteral(lit) -> + PUSH32 + SubAssembly(assembly block) -> + append codegen(block) at the end of the code + dataSize() -> + assert that is a subassembly -> + PUSH32> + linkerSymbol() -> + PUSH32 and append position to linker table + } -- cgit From 9128e73b03856c72e90c581cb36e931e971af29e Mon Sep 17 00:00:00 2001 From: Yoichi Hirai Date: Mon, 2 Jan 2017 16:13:57 +0100 Subject: docs: clarify what are considered as absolute paths --- docs/layout-of-source-files.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/layout-of-source-files.rst b/docs/layout-of-source-files.rst index dff48be3..1e27b7c0 100644 --- a/docs/layout-of-source-files.rst +++ b/docs/layout-of-source-files.rst @@ -79,8 +79,9 @@ Paths ----- In the above, ``filename`` is always treated as a path with ``/`` as directory separator, -``.`` as the current and ``..`` as the parent directory. Path names that do not start -with ``.`` are treated as absolute paths. +``.`` as the current and ``..`` as the parent directory. When ``.`` or ``..`` is followed by a character except ``/``, +it is not considered as the current or the parent directory. +All path names are treated as absolute paths unless they start with the current ``.`` or the parent directory ``..``. To import a file ``x`` from the same directory as the current file, use ``import "./x" as x;``. If you use ``import "x" as x;`` instead, a different file could be referenced -- cgit From 41fe412389645fd423c8fb2422db99892f829030 Mon Sep 17 00:00:00 2001 From: Yoichi Hirai Date: Mon, 2 Jan 2017 16:47:45 +0100 Subject: interface: change absolutePath() so that ".dir" is considered as an absolute path fixes #1534 --- libsolidity/interface/CompilerStack.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libsolidity/interface/CompilerStack.cpp b/libsolidity/interface/CompilerStack.cpp index 4095844f..ee55f41a 100644 --- a/libsolidity/interface/CompilerStack.cpp +++ b/libsolidity/interface/CompilerStack.cpp @@ -593,11 +593,11 @@ bool CompilerStack::checkLibraryNameClashes() string CompilerStack::absolutePath(string const& _path, string const& _reference) const { - // Anything that does not start with `.` is an absolute path. - if (_path.empty() || _path.front() != '.') - return _path; using path = boost::filesystem::path; path p(_path); + // Anything that does not start with `.` is an absolute path. + if (p.begin() == p.end() || (*p.begin() != "." && *p.begin() != "..")) + return _path; path result(_reference); result.remove_filename(); for (path::iterator it = p.begin(); it != p.end(); ++it) -- cgit From 07f34e0023c395b43c5d63ce5a4d2cf5160025fa Mon Sep 17 00:00:00 2001 From: Yoichi Hirai Date: Tue, 3 Jan 2017 18:16:33 +0100 Subject: test: add a test importing a file name starting with a period --- test/libsolidity/Imports.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/libsolidity/Imports.cpp b/test/libsolidity/Imports.cpp index bc6adc26..e3f0b281 100644 --- a/test/libsolidity/Imports.cpp +++ b/test/libsolidity/Imports.cpp @@ -164,6 +164,14 @@ BOOST_AUTO_TEST_CASE(context_dependent_remappings) BOOST_CHECK(c.compile()); } +BOOST_AUTO_TEST_CASE(filename_with_period) +{ + CompilerStack c; + c.addSource("a/a.sol", "import \".b.sol\"; contract A is B {} pragma solidity >=0.0;"); + c.addSource("a/.b.sol", "contract B {} pragma solidity >=0.0;"); + BOOST_CHECK(!c.compile()); +} + BOOST_AUTO_TEST_SUITE_END() } -- cgit From 4bc934abce48a6c30cdaa9f4cbf7dae15f292831 Mon Sep 17 00:00:00 2001 From: chriseth Date: Wed, 4 Jan 2017 12:34:44 +0100 Subject: More information about switch, loops and functions. --- docs/assembly.rst | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/docs/assembly.rst b/docs/assembly.rst index 8ba3f56c..255ad3e6 100644 --- a/docs/assembly.rst +++ b/docs/assembly.rst @@ -604,12 +604,89 @@ For both ways, the colon points to the name of the variable. =: v // instruction style assignment, puts the result of sload(10) into v } +Switch +------ + +You can use a switch statement as a very basic version of "if/else". +It takes the value of an expression and compares it to several constants. +The branch corresponding to the matching constant is taken. Contrary to the +error-prone behaviour of some programming languages, control flow does +not continue from one case to the next. There is a fallback or default +case called ``default``. + +.. code:: + + assembly { + let x := 0 + switch calldataload(4) + case 0: { x := calldataload(0x24) } + default: { x := calldataload(0x44) } + sstore(0, div(x, 2)) + } + +The list of cases does not require curly braces, but the body of a +case does require them. + +Loops +----- + +Assembly supports a simple for-style loop. For-style loops have +a header containing an initializing part, a condition and a post-iteration +part. The condition has to be a functional-style expression, while +the other two can also be blocks. If the initializing part is a block that +declares any variables, the scope of these variables is extended into the +body (including the condition and the post-iteration part). + +The following example computes the sum of an area in memory. + +.. code:: + + assembly { + let x := 0 + for { let i := 0 } lt(i, 0x100) { i := add(i, 0x20) } { + x := add(x, mload(i)) + } + } + +Functions +--------- + +Assembly allows the definition of low-level functions. These take their +arguments (and a return PC) from the stack and also put the results onto the +stack. Calling a function looks the same way as executing a functional-style +opcode. + +Functions can be defined anywhere and are visible in the block they are +declared in. Inside a function, you cannot access local variables +defined outside of that function. There is no explicit ``return`` +statement. + +If you call a function that returns multiple values, you have to assign +them to a tuple using ``(a, b) := f(x)`` or ``let (a, b) := f(x)``. + +The following example implements the power function by square-and-multiply. + +.. code:: + + assembly { + function power(base, exponent) -> (result) { + switch exponent + 0: { result := 1 } + 1: { result := base } + default: { + result := power(mul(base, base), div(exponent, 2)) + switch mod(exponent, 2) + 1: { result := mul(base, result) } + } + } + } Things to Avoid --------------- Inline assembly might have a quite high-level look, but it actually is extremely -low-level. The only thing the assembler does for you is re-arranging +low-level. Function calls, loops and switches are converted by simple +rewriting rules and after that, the only thing the assembler does for you is re-arranging functional-style opcodes, managing jump labels, counting stack height for variable access and removing stack slots for assembly-local variables when the end of their block is reached. Especially for those two last cases, it is important -- cgit From e92af89ec8563334ddb9aa8f208a249c149b3092 Mon Sep 17 00:00:00 2001 From: chriseth Date: Wed, 4 Jan 2017 12:58:35 +0100 Subject: Typos and clarifications. --- docs/assembly.rst | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/docs/assembly.rst b/docs/assembly.rst index 255ad3e6..2d868e4d 100644 --- a/docs/assembly.rst +++ b/docs/assembly.rst @@ -29,7 +29,7 @@ arising when writing manual assembly by the following features: * labels: ``let x := 10 repeat: x := sub(x, 1) jumpi(repeat, eq(x, 0))`` * loops: ``for { let i := 0 } lt(i, x) { i := add(i, 1) } { y := mul(2, y) }`` * switch statements: ``switch x case 0: { y := mul(x, 2) } default: { y := 0 }`` -* function calls: ``function f(x) -> (y) { switch x case 0: { y := 1 } default: y := mul(x, f(sub(x, 1))) }`` +* function calls: ``function f(x) -> (y) { switch x case 0: { y := 1 } default: { y := mul(x, f(sub(x, 1))) } }`` .. note:: Of the above, loops, function calls and switch statements are not yet implemented. @@ -69,7 +69,7 @@ idea is that assembly libraries will be used to enhance the language in such way Inline assembly could also be beneficial in cases where the optimizer fails to produce efficient code. Please be aware that assembly is much more difficult to write because -the compiler does not perform checks, so you should use it for complex things only if +the compiler does not perform checks, so you should use it only if you really know what you are doing. .. code:: @@ -88,7 +88,7 @@ you really know what you are doing. function sumAsm(uint[] _data) returns (uint o_sum) { for (uint i = 0; i < _data.length; ++i) { assembly { - o_sum := mload(add(add(_data, 0x20), i)) + o_sum := mload(add(add(_data, 0x20), mul(i, 0x20))) } } } @@ -109,7 +109,7 @@ to write assembly programs that do not make use of explicit ``SWAP``, ``DUP``, ``JUMP`` and ``JUMPI`` statements, because the first two obfuscate the data flow and the last two obfuscate control flow. Furthermore, functional statements of the form ``mul(add(x, y), 7)`` are preferred over pure opcode statements like -``7 x y add mul`` because in the first form, it is much easier to see which +``7 y x add mul`` because in the first form, it is much easier to see which operand is used for which opcode. The second goal is achieved by introducing a desugaring phase that only removes @@ -131,11 +131,14 @@ outer assembly is visible in a sub-assembly. If control flow passes over the end of a block, pop instructions are inserted that match the number of local variables declared in that block, unless the ``}`` is directly preceded by an opcode that does not have a continuing control -flow path. The stack height is reduced by the number of local variables -regardless of that. This mean that labels in the next block will have the -same height as before the block that just ended. - -If at the end of a block, the stack is not balanced, a warning is issued, +flow path. Whenever a local variable is referenced, the code generator needs +to know its current relative position in the stack and thus it needs to +keep track of the current so-called stack height. +At the end of a block, this implicit stack height is always reduced by the number +of local variables whether ther is a continuing control flow or not. + +This means that the stack height before and after the block should be the same. +If this is not the case, a warning is issued, unless the last instruction in the block did not have a continuing control flow path. Why do we use higher-level constructs like ``switch``, ``for`` and functions: @@ -160,7 +163,7 @@ We consider the runtime bytecode of the following Solidity program:: contract C { function f(uint x) returns (uint y) { - y = 1 + y = 1; for (uint i = 0; i < x; i++) y = 2 * y; } @@ -228,6 +231,7 @@ After the desugaring phase it looks as follows:: $fun_allocate: { $start[$retpos, size]: + // output variables live in the same scope as the arguments. let pos := 0 { pos := mload(0x40) @@ -261,7 +265,7 @@ Syntax ------ Inline assembly parses comments, literals and identifiers exactly as Solidity, so you can use the -usual ``//`` and ``/* */`` comments. Inline assembly is initiated by ``assembly { ... }`` and inside +usual ``//`` and ``/* */`` comments. Inline assembly is marked by ``assembly { ... }`` and inside these curly braces, the following can be used (see the later sections for more details) - literals, i.e. ``0x123``, ``42`` or ``"abc"`` (strings up to 32 characters) @@ -778,8 +782,8 @@ Grammar:: AssemblySwitch = 'switch' FunctionalAssemblyExpression AssemblyCase* ( 'default' ':' AssemblyBlock )? AssemblyCase = 'case' FunctionalAssemblyExpression ':' AssemblyBlock - AssemblyFunctionDefinition = 'function' Identifier '(' IdentifierList? ')' '->' - ( '(' IdentifierList ')' AssemblyBlock + AssemblyFunctionDefinition = 'function' Identifier '(' IdentifierList? ')' + ( '->' '(' IdentifierList ')' )? AssemblyBlock AssemblyFor = 'for' ( AssemblyBlock | FunctionalAssemblyExpression) FunctionalAssemblyExpression ( AssemblyBlock | FunctionalAssemblyExpression) AssemblyBlock SubAssembly = 'assembly' Identifier AssemblyBlock -- cgit From e45510d02ff0c543038802359d4920b3e500d517 Mon Sep 17 00:00:00 2001 From: chriseth Date: Wed, 4 Jan 2017 13:21:41 +0100 Subject: Create source tarballs for releases. --- .travis.yml | 51 ++++++++++++---------------------------- scripts/create_source_tarball.sh | 35 +++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 36 deletions(-) create mode 100755 scripts/create_source_tarball.sh diff --git a/.travis.yml b/.travis.yml index 820125cf..a50674c1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,11 +26,13 @@ language: cpp branches: - # We need to whitelist the branches which we want to have "push" automation. + # We need to whitelist the branches which we want to have "push" automation, + # this includes tags (which are treated as branches by travis). # Pull request automation is not constrained to this set of branches. only: - develop - release + - /^v[0-9]/ matrix: include: # Ubuntu 14.04 LTS "Trusty Tahr" @@ -142,7 +144,6 @@ cache: install: - test $TRAVIS_INSTALL_DEPS != On || ./scripts/install_deps.sh - echo -n "$TRAVIS_COMMIT" > commit_hash.txt - - test "$TRAVIS_PULL_REQUESTS" != "false" || test "$TRAVIS_BRANCH" != release || echo -n > prerelease.txt # this is a proper release before_script: - test $TRAVIS_EMSCRIPTEN != On || ./scripts/build_emscripten.sh - test $TRAVIS_RELEASE != On || (mkdir -p build @@ -150,7 +151,8 @@ before_script: && cmake .. -DCMAKE_BUILD_TYPE=$TRAVIS_BUILD_TYPE && make -j2 && cd .. - && ./scripts/release.sh $ZIP_SUFFIX ) + && ./scripts/release.sh $ZIP_SUFFIX + && ./scripts/create_source_tarball.sh ) script: - test $TRAVIS_DOCS != On || ./scripts/docs.sh @@ -191,43 +193,20 @@ deploy: - release # This is the deploy target for the native build (Linux and macOS) - # which generates ZIPs per commit. We are in agreement that - # generating ZIPs per commit for the develop branch is probably - # just noise, so we only run this deployment target on 'release'. - # - # Unlike the Appveyor GitHub Releases target, the support in TravisCI - # seemingly doesn't provide a means for passing a description, tag, etc. - # In practice, we are letting the Appveyor CI do all that stuff, and - # then this deployment flow just seems to find that most recent tag, - # and just add our Linux and macOS ZIPs into the same tag, which is - # what we want to happen. But is very accidental and brittle-looking. - # - # The 'skip_cleanup' stops the workspace being cleaned out prior to - # generation of the artifacts. Strange that we should explicitly - # need to do that, but we do. - # - # Tokens in TravisCI can be generated a few different ways. Bob had - # success using the 'travis' gem, and then using that gem to - # create/edit this .travis.yml file, and then cut-and-pasting the - # good bits back out of what it generated. The gem changes all the - # whitespace and deletes comments, so cannot be used as-is. But - # it does generate an appropriate auth token. - # - # TODO - I do not know if the api_key below which work correctly - # for ethereum/solidity. I suspect not, for the same reason as - # my auth token does not work for Appveyor. I don't have enough - # permissions to enable this myself. Christian should be able to. - # - # See https://docs.travis-ci.com/user/deployment/releases - # See https://blog.travis-ci.com/2013-01-28-token-token-token/ - # See https://github.com/ethereum/webthree-umbrella/issues/658 + # which generates ZIPs per commit and the source tarball. # + # This runs for each tag that is created and adds the corresponding files. - provider: releases api_key: secure: PWH37xVBCF0XiSjl+eH7XIdkrfxZXjzvqF4PiBOnD3VnFz+odrdnIwBmCeBYTHTWF8efpp8fmzWJk2UVq1JcpyZiC+SVxO8dx91W2ia1a+wKrEQuDgkUrZBkl5IQNCv0QS81DDQhliyZEaYh8wHO/7RReyMpGpw2U2u85WkFiZ+LdlHEZPfzUeh9lxQ9n8qwFL8Rja+Q05d4cQ8zaVEtofJJT4T6DUWhc3TzuxDYxOmjwg37rC9CkGSLn6VadSh8b3j5R0SZupFsAEvBL/imBLP9r9ewoo7o4p6By3jwiIgH9yNg7LM618xbffcNaYF/KtLBi9uPHfqF7hRD4PlECz+D0PR78nQItOX5HKm1QMg5kCnghRVCA0IVjpV5fiYQnMLM7dCRv34I5b3zLpa69wQ/GLYB2FViqNUfvPeiZTEeIJ2OmATlFx8AH2JoqpY1XJknWb35+vMfa8LSiJJW++SLWeV+ncC92hrvyZ1cy3trepRRZIfyYepxHifnfdWMkddQUJk5b2WS5Fy/TJLZNPeombnpvRhUC38dsYItarKeXTc6k4oADCEDZ2rgGIcEiqRxXV11Y5xHJekLDWzUs+YJNcCuL4pnAP//LOnbnH2w9rLpwhQYSl0anCd097NivAXQJXO2JI/byIYz1kiCVQWnW6EM8+72mLOklf/Qr8k= - file: $TRAVIS_BUILD_DIR/solidity-$ZIP_SUFFIX.zip + + overwrite: true + file_glob: true + file: + - $TRAVIS_BUILD_DIR/solidity*.zip + - $TRAVIS_BUILD_DIR/solidity*tar.gz skip_cleanup: true on: - repo: ethereum/solidity - branch: release + all_branches: true + tags: true condition: $TRAVIS_RELEASE == On diff --git a/scripts/create_source_tarball.sh b/scripts/create_source_tarball.sh new file mode 100755 index 00000000..350f6b4d --- /dev/null +++ b/scripts/create_source_tarball.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env sh +# + +set -e + +REPO_ROOT="$(dirname "$0")"/.. +( + cd "$REPO_ROOT" + version=`grep -oP "PROJECT_VERSION \"?\K[0-9.]+(?=\")"? CMakeLists.txt` + commithash=`git rev-parse --short=8 HEAD` + committimestamp=`git show --format=%ci HEAD | head -n 1` + commitdate=`git show --format=%ci HEAD | head -n 1 | cut - -b1-10 | sed -e 's/-0?/./' | sed -e 's/-0?/./'` + + # file exists and has zero size -> not a prerelease + if [ -e prerelease.txt -a ! -s prerelease.txt ] + then + versionstring="$version" + else + versionstring="$version-develop-$commitdate-$commithash" + fi + + TEMPDIR=$(mktemp -d) + SOLDIR="$TEMPDIR/solidity_$versionstring/" + mkdir "$SOLDIR" + # Store the current source + git checkout-index -a --prefix="$SOLDIR" + git submodule foreach 'git checkout-index -a --prefix="'"$SOLDIR"'/$path/"' + # Store the commit hash + echo "$commithash" > "$SOLDIR/commit_hash.txt" + # Add dependencies + mkdir -p "$SOLDIR/deps/downloads/" 2>/dev/null || true + wget -O "$SOLDIR/deps/downloads/jsoncpp-1.7.7.tar.gz" https://github.com/open-source-parsers/jsoncpp/archive/1.7.7.tar.gz + tar czf "$REPO_ROOT/solidity_$versionstring.tar.gz" -C "$TEMPDIR" "solidity_$versionstring" + rm -r "$TEMPDIR" +) -- cgit From 584356e7f63bbbd14a073760e2b5091f8b0919ef Mon Sep 17 00:00:00 2001 From: Federico Bond Date: Wed, 4 Jan 2017 23:09:10 -0300 Subject: Rename contractName to typeName when parsing new expression --- libsolidity/parsing/Parser.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libsolidity/parsing/Parser.cpp b/libsolidity/parsing/Parser.cpp index f02a4a45..e26e2908 100644 --- a/libsolidity/parsing/Parser.cpp +++ b/libsolidity/parsing/Parser.cpp @@ -1153,9 +1153,9 @@ ASTPointer Parser::parseLeftHandSideExpression( else if (m_scanner->currentToken() == Token::New) { expectToken(Token::New); - ASTPointer contractName(parseTypeName(false)); - nodeFactory.setEndPositionFromNode(contractName); - expression = nodeFactory.createNode(contractName); + ASTPointer typeName(parseTypeName(false)); + nodeFactory.setEndPositionFromNode(typeName); + expression = nodeFactory.createNode(typeName); } else expression = parsePrimaryExpression(); -- cgit From 779a01faa9b18a58a133a120d6aebe1df612cb00 Mon Sep 17 00:00:00 2001 From: Yoichi Hirai Date: Fri, 6 Jan 2017 16:21:16 +0100 Subject: Changelog: document #1537 --- Changelog.md | 1 + 1 file changed, 1 insertion(+) diff --git a/Changelog.md b/Changelog.md index a82e8744..0ac0cb2f 100644 --- a/Changelog.md +++ b/Changelog.md @@ -2,6 +2,7 @@ BugFixes: * Type checker, code generator: enable access to events of base contracts' names. + * Imports: ``import ".dir/a"`` is not a relative path. Relative paths begin with directory ``.`` or ``..``. ### 0.4.7 (2016-12-15) -- cgit From 4f62980d52daa58f21dad3ef7caca5f854395c38 Mon Sep 17 00:00:00 2001 From: VoR0220 Date: Wed, 4 Jan 2017 18:29:54 -0600 Subject: added test Signed-off-by: VoR0220 --- test/libsolidity/Imports.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/libsolidity/Imports.cpp b/test/libsolidity/Imports.cpp index e3f0b281..33978b1f 100644 --- a/test/libsolidity/Imports.cpp +++ b/test/libsolidity/Imports.cpp @@ -172,6 +172,17 @@ BOOST_AUTO_TEST_CASE(filename_with_period) BOOST_CHECK(!c.compile()); } +BOOST_AUTO_TEST_CASE(context_dependent_remappings_ensure_default_and_module_preserved) +{ + CompilerStack c; + c.setRemappings(vector{"foo=vendor/foo_2.0.0", "vendor/bar:foo=vendor/foo_1.0.0", "bar=vendor/bar"}); + c.addSource("main.sol", "import \"foo/foo.sol\"; import {Bar} \"bar/bar.sol\"; contract Main is Foo2, Bar {} pragma solidity >=0.0;"); + c.addSource("vendor/bar/bar.sol", "import \"foo/foo.sol\"; contract Bar is Foo1 {} pragma solidity >=0.0;"); + c.addSource("vendor/foo_1.0.0/foo.sol", "contract Foo1 {} pragma solidity >=0.0;"); + c.addSource("vendor/foo_2.0.0/foo.sol", "contract Foo2 {} pragma solidity >=0.0;"); + BOOST_CHECK(c.compile()); +} + BOOST_AUTO_TEST_SUITE_END() } -- cgit From 6d9020b3b80aff0baf7d6e023460cfbcd930de6b Mon Sep 17 00:00:00 2001 From: VoR0220 Date: Thu, 5 Jan 2017 12:01:27 -0600 Subject: fixed test and added solution Signed-off-by: VoR0220 --- libsolidity/interface/CompilerStack.cpp | 29 ++++++++++++++++++++++++++--- test/libsolidity/Imports.cpp | 4 ++-- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/libsolidity/interface/CompilerStack.cpp b/libsolidity/interface/CompilerStack.cpp index ee55f41a..a097e4c8 100644 --- a/libsolidity/interface/CompilerStack.cpp +++ b/libsolidity/interface/CompilerStack.cpp @@ -507,21 +507,44 @@ string CompilerStack::applyRemapping(string const& _path, string const& _context return false; return std::equal(_a.begin(), _a.end(), _b.begin()); }; + // Try to find whether _a is a closer match for context _reference than _b + // Defaults to longest prefix in case of a tie. + auto isClosestContext = [](string const& _a, string const& _b, string const& _reference) + { + int a = _reference.compare(_a); + int b = _reference.compare(_b); + if (a == 0) + return true; + else if (b == 0) + return false; + else if (abs(a) == abs(b)) { + return a > 0; + } + return abs(a) < abs(b); + }; + using filepath = boost::filesystem::path; + filepath context(_context); size_t longestPrefix = 0; string longestPrefixTarget; + string currentClosestContext; + string referenceContext = context.parent_path().generic_string(); for (auto const& redir: m_remappings) { + filepath redirContext(redir.context); // Skip if we already have a closer match. - if (longestPrefix > 0 && redir.prefix.length() <= longestPrefix) + if (longestPrefix > 0 && redir.prefix.length() < longestPrefix) continue; // Skip if redir.context is not a prefix of _context - if (!isPrefixOf(redir.context, _context)) + if (!isPrefixOf(redirContext.generic_string(), _context)) continue; // Skip if the prefix does not match. if (!isPrefixOf(redir.prefix, _path)) continue; - + // Skip if there is a prefix collision and the current context is closer + if (redir.prefix.length() == longestPrefix && !isClosestContext(redirContext.generic_string(), currentClosestContext, referenceContext)) + continue; + currentClosestContext = redir.context; longestPrefix = redir.prefix.length(); longestPrefixTarget = redir.target; } diff --git a/test/libsolidity/Imports.cpp b/test/libsolidity/Imports.cpp index 33978b1f..7945f729 100644 --- a/test/libsolidity/Imports.cpp +++ b/test/libsolidity/Imports.cpp @@ -176,8 +176,8 @@ BOOST_AUTO_TEST_CASE(context_dependent_remappings_ensure_default_and_module_pres { CompilerStack c; c.setRemappings(vector{"foo=vendor/foo_2.0.0", "vendor/bar:foo=vendor/foo_1.0.0", "bar=vendor/bar"}); - c.addSource("main.sol", "import \"foo/foo.sol\"; import {Bar} \"bar/bar.sol\"; contract Main is Foo2, Bar {} pragma solidity >=0.0;"); - c.addSource("vendor/bar/bar.sol", "import \"foo/foo.sol\"; contract Bar is Foo1 {} pragma solidity >=0.0;"); + c.addSource("main.sol", "import \"foo/foo.sol\"; import {Bar} from \"bar/bar.sol\"; contract Main is Foo2, Bar {} pragma solidity >=0.0;"); + c.addSource("vendor/bar/bar.sol", "import \"foo/foo.sol\"; contract Bar {Foo1 foo;} pragma solidity >=0.0;"); c.addSource("vendor/foo_1.0.0/foo.sol", "contract Foo1 {} pragma solidity >=0.0;"); c.addSource("vendor/foo_2.0.0/foo.sol", "contract Foo2 {} pragma solidity >=0.0;"); BOOST_CHECK(c.compile()); -- cgit From 79dbd40096006eac2861dd86dd22fd0c9ab55c6c Mon Sep 17 00:00:00 2001 From: VoR0220 Date: Sat, 7 Jan 2017 12:11:43 -0600 Subject: can do this purely on length. Also made prefix filesystem string for more accurate readings. Signed-off-by: VoR0220 --- libsolidity/interface/CompilerStack.cpp | 43 ++++++++++++--------------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/libsolidity/interface/CompilerStack.cpp b/libsolidity/interface/CompilerStack.cpp index a097e4c8..1a95f5a6 100644 --- a/libsolidity/interface/CompilerStack.cpp +++ b/libsolidity/interface/CompilerStack.cpp @@ -507,47 +507,36 @@ string CompilerStack::applyRemapping(string const& _path, string const& _context return false; return std::equal(_a.begin(), _a.end(), _b.begin()); }; - // Try to find whether _a is a closer match for context _reference than _b - // Defaults to longest prefix in case of a tie. - auto isClosestContext = [](string const& _a, string const& _b, string const& _reference) - { - int a = _reference.compare(_a); - int b = _reference.compare(_b); - if (a == 0) - return true; - else if (b == 0) - return false; - else if (abs(a) == abs(b)) { - return a > 0; - } - return abs(a) < abs(b); - }; using filepath = boost::filesystem::path; - filepath context(_context); size_t longestPrefix = 0; + size_t longestContext = 0; string longestPrefixTarget; - string currentClosestContext; - string referenceContext = context.parent_path().generic_string(); + for (auto const& redir: m_remappings) { filepath redirContext(redir.context); - // Skip if we already have a closer match. - if (longestPrefix > 0 && redir.prefix.length() < longestPrefix) + filepath redirPrefix(redir.prefix); + string contextFileString = redirContext.generic_string(); + string prefixFileString = redirPrefix.generic_string(); + // Skip if there is a prefix collision and the current context is closer + if (longestContext > 0 && contextFileString.length() < longestContext) continue; // Skip if redir.context is not a prefix of _context - if (!isPrefixOf(redirContext.generic_string(), _context)) + if (!isPrefixOf(contextFileString, _context)) continue; - // Skip if the prefix does not match. - if (!isPrefixOf(redir.prefix, _path)) + // Skip if we already have a closer match. + if (longestPrefix > 0 && prefixFileString.length() < longestPrefix) continue; - // Skip if there is a prefix collision and the current context is closer - if (redir.prefix.length() == longestPrefix && !isClosestContext(redirContext.generic_string(), currentClosestContext, referenceContext)) + // Skip if the prefix does not match. + if (!isPrefixOf(prefixFileString, _path)) continue; - currentClosestContext = redir.context; - longestPrefix = redir.prefix.length(); + + longestContext = contextFileString.length(); + longestPrefix = prefixFileString.length(); longestPrefixTarget = redir.target; } + string path = longestPrefixTarget; path.append(_path.begin() + longestPrefix, _path.end()); return path; -- cgit From 1cecaab9b68ce627185dca3efe793f4af949ad2b Mon Sep 17 00:00:00 2001 From: chriseth Date: Mon, 9 Jan 2017 12:14:01 +0100 Subject: Replace `` by $(). --- scripts/create_source_tarball.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts/create_source_tarball.sh b/scripts/create_source_tarball.sh index 350f6b4d..1f78e12c 100755 --- a/scripts/create_source_tarball.sh +++ b/scripts/create_source_tarball.sh @@ -6,10 +6,9 @@ set -e REPO_ROOT="$(dirname "$0")"/.. ( cd "$REPO_ROOT" - version=`grep -oP "PROJECT_VERSION \"?\K[0-9.]+(?=\")"? CMakeLists.txt` - commithash=`git rev-parse --short=8 HEAD` - committimestamp=`git show --format=%ci HEAD | head -n 1` - commitdate=`git show --format=%ci HEAD | head -n 1 | cut - -b1-10 | sed -e 's/-0?/./' | sed -e 's/-0?/./'` + version=$(grep -oP "PROJECT_VERSION \"?\K[0-9.]+(?=\")"? CMakeLists.txt) + commithash=$(git rev-parse --short=8 HEAD) + commitdate=$(git show --format=%ci HEAD | head -n 1 | cut - -b1-10 | sed -e 's/-0?/./' | sed -e 's/-0?/./') # file exists and has zero size -> not a prerelease if [ -e prerelease.txt -a ! -s prerelease.txt ] -- cgit From ceac5c5a0c75e92040749e4dca794b2a07a431f0 Mon Sep 17 00:00:00 2001 From: chriseth Date: Mon, 9 Jan 2017 15:15:30 +0100 Subject: Move explanatory sections and other small changes. --- docs/assembly.rst | 375 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 192 insertions(+), 183 deletions(-) diff --git a/docs/assembly.rst b/docs/assembly.rst index 2d868e4d..57c0bf9b 100644 --- a/docs/assembly.rst +++ b/docs/assembly.rst @@ -94,177 +94,11 @@ you really know what you are doing. } } -Standalone Assembly -=================== - -This assembly language tries to achieve several goals: - -1. Programs written in it should be readable, even if the code is generated by a compiler from Solidity. -2. The translation from assembly to bytecode should contain as few "surprises" as possible. -3. Control flow should be easy to detect to help in formal verification and optimization. - -In order to achieve the first and last goal, assembly provides high-level constructs -like ``for`` loops, ``switch`` statements and function calls. It should be possible -to write assembly programs that do not make use of explicit ``SWAP``, ``DUP``, -``JUMP`` and ``JUMPI`` statements, because the first two obfuscate the data flow -and the last two obfuscate control flow. Furthermore, functional statements of -the form ``mul(add(x, y), 7)`` are preferred over pure opcode statements like -``7 y x add mul`` because in the first form, it is much easier to see which -operand is used for which opcode. - -The second goal is achieved by introducing a desugaring phase that only removes -the higher level constructs in a very regular way and still allows inspecting -the generated low-level assembly code. The only non-local operation performed -by the assembler is name lookup of user-defined identifiers (functions, variables, ...), -which follow very simple and regular scoping rules and cleanup of local variables from the stack. - -Scoping: An identifier that is declared (label, variable, function, assembly) -is only visible in the block where it was declared (including nested blocks -inside the current block). It is not legal to access local variables across -function borders, even if they would be in scope. Shadowing is allowed, but -two identifiers with the same name cannot be declared in the same block. -Local variables cannot be accessed before they were declared, but labels, -functions and assemblies can. Assemblies are special blocks that are used -for e.g. returning runtime code or creating contracts. No identifier from an -outer assembly is visible in a sub-assembly. - -If control flow passes over the end of a block, pop instructions are inserted -that match the number of local variables declared in that block, unless the -``}`` is directly preceded by an opcode that does not have a continuing control -flow path. Whenever a local variable is referenced, the code generator needs -to know its current relative position in the stack and thus it needs to -keep track of the current so-called stack height. -At the end of a block, this implicit stack height is always reduced by the number -of local variables whether ther is a continuing control flow or not. - -This means that the stack height before and after the block should be the same. -If this is not the case, a warning is issued, -unless the last instruction in the block did not have a continuing control flow path. - -Why do we use higher-level constructs like ``switch``, ``for`` and functions: - -Using ``switch``, ``for`` and functions, it should be possible to write -complex code without using ``jump`` or ``jumpi`` manually. This makes it much -easier to analyze the control flow, which allows for improved formal -verification and optimization. - -Furthermore, if manual jumps are allowed, computing the stack height is rather complicated. -The position of all local variables on the stack needs to be known, otherwise -neither references to local variables nor removing local variables automatically -from the stack at the end of a block will work properly. Because of that, -every label that is preceded by an instruction that ends or diverts control flow -should be annotated with the current stack layout. This annotation is performed -automatically during the desugaring phase. - -Example: - -We will follow an example compilation from Solidity to desugared assembly. -We consider the runtime bytecode of the following Solidity program:: - - contract C { - function f(uint x) returns (uint y) { - y = 1; - for (uint i = 0; i < x; i++) - y = 2 * y; - } - } - -The following assembly will be generated:: - - { - mstore(0x40, 0x60) // store the "free memory pointer" - // function dispatcher - switch div(calldataload(0), exp(2, 226)) - case 0xb3de648b: { - let (r,) = f(calldataload(4)) - let ret := $allocate(0x20) - mstore(ret, r) - return(ret, 0x20) - } - default: { jump(invalidJumpLabel) } - // memory allocator - function $allocate(size) -> (pos) { - pos := mload(0x40) - mstore(0x40, add(pos, size)) - } - // the contract function - function f(x) -> (y) { - y := 1 - for { let i := 0 } lt(i, x) { i := add(i, 1) } { - y := mul(2, y) - } - } - } - -After the desugaring phase it looks as follows:: - - { - mstore(0x40, 0x60) - { - let $0 := div(calldataload(0), exp(2, 226)) - jumpi($case1, eq($0, 0xb3de648b)) - jump($caseDefault) - $case1: - { - // the function call - we put return label and arguments on the stack - $ret1 calldataload(4) jump($fun_f) - $ret1 [r]: // a label with a [...]-annotation resets the stack height - // to "current block + number of local variables". It also - // introduces a variable, r: - // r is at top of stack, $0 is below (from enclosing block) - $ret2 0x20 jump($fun_allocate) - $ret2 [ret]: // stack here: $0, r, ret (top) - mstore(ret, r) - return(ret, 0x20) - // although it is useless, the jump is automatically inserted, - // since the desugaring process does not analyze control-flow - jump($endswitch) - } - $caseDefault: - { - jump(invalidJumpLabel) - jump($endswitch) - } - $endswitch: - } - jump($afterFunction) - $fun_allocate: - { - $start[$retpos, size]: - // output variables live in the same scope as the arguments. - let pos := 0 - { - pos := mload(0x40) - mstore(0x40, add(pos, size)) - } - swap1 pop swap1 jump - } - $fun_f: - { - start [$retpos, x]: - let y := 0 - { - let i := 0 - $for_begin: - jumpi($for_end, iszero(lt(i, x))) - { - y := mul(2, y) - } - $for_continue: - { i := add(i, 1) } - jump($for_begin) - $for_end: - } // Here, a pop instruction is inserted for i - swap1 pop swap1 jump - } - $afterFunction: - stop - } Syntax ------ -Inline assembly parses comments, literals and identifiers exactly as Solidity, so you can use the +Assembly parses comments, literals and identifiers exactly as Solidity, so you can use the usual ``//`` and ``/* */`` comments. Inline assembly is marked by ``assembly { ... }`` and inside these curly braces, the following can be used (see the later sections for more details) @@ -273,7 +107,7 @@ these curly braces, the following can be used (see the later sections for more d - opcode in functional style, e.g. ``add(1, mlod(0))`` - labels, e.g. ``name:`` - variable declarations, e.g. ``let x := 7`` or ``let x := add(y, 3)`` - - identifiers (externals, labels or assembly-local variables), e.g. ``jump(name)``, ``3 x add`` + - identifiers (labels or assembly-local variables and externals if used as inline assembly), e.g. ``jump(name)``, ``3 x add`` - assignments (in "instruction style"), e.g. ``3 =: x`` - assignments in functional style, e.g. ``x := add(y, 3)`` - blocks where local variables are scoped inside, e.g. ``{ let x := 3 { let y := add(x, 1) } }`` @@ -535,7 +369,7 @@ jumps easier. The following code computes an element in the Fibonacci series. Please note that automatically accessing stack variables can only work if the assembler knows the current stack height. This fails to work if the jump source -and target have different stack heights. It is still fine to use such jumps, +and target have different stack heights. It is still fine to use such jumps, but you should just not access any stack variables (even assembly variables) in that case. Furthermore, the stack height analyser goes through the code opcode by opcode @@ -593,11 +427,12 @@ Assignments are possible to assembly-local variables and to function-local variables. Take care that when you assign to variables that point to memory or storage, you will only change the pointer and not the data. -There are two kinds of assignments: Functional-style and instruction-style. +There are two kinds of assignments: functional-style and instruction-style. For functional-style assignments (``variable := value``), you need to provide a value in a functional-style expression that results in exactly one stack value and for instruction-style (``=: variable``), the value is just taken from the stack top. -For both ways, the colon points to the name of the variable. +For both ways, the colon points to the name of the variable. The assignment +is performed by replacing the variable's value on the stack by the new value. .. code:: @@ -615,7 +450,7 @@ You can use a switch statement as a very basic version of "if/else". It takes the value of an expression and compares it to several constants. The branch corresponding to the matching constant is taken. Contrary to the error-prone behaviour of some programming languages, control flow does -not continue from one case to the next. There is a fallback or default +not continue from one case to the next. There can be a fallback or default case called ``default``. .. code:: @@ -623,8 +458,12 @@ case called ``default``. assembly { let x := 0 switch calldataload(4) - case 0: { x := calldataload(0x24) } - default: { x := calldataload(0x44) } + case 0: { + x := calldataload(0x24) + } + default: { + x := calldataload(0x44) + } sstore(0, div(x, 2)) } @@ -675,13 +514,13 @@ The following example implements the power function by square-and-multiply. assembly { function power(base, exponent) -> (result) { switch exponent - 0: { result := 1 } - 1: { result := base } - default: { - result := power(mul(base, base), div(exponent, 2)) - switch mod(exponent, 2) - 1: { result := mul(base, result) } - } + 0: { result := 1 } + 1: { result := base } + default: { + result := power(mul(base, base), div(exponent, 2)) + switch mod(exponent, 2) + 1: { result := mul(base, result) } + } } } @@ -724,8 +563,175 @@ first slot of the array and then only the array elements follow. please do not rely on that. -Specification -============= +Standalone Assembly +=================== + +The assembly language described as inline assembly above can also be used +standalone and in fact, the plan is to use it as an intermediate language +for the Solidity compiler. In this form, it tries to achieve several goals: + +1. Programs written in it should be readable, even if the code is generated by a compiler from Solidity. +2. The translation from assembly to bytecode should contain as few "surprises" as possible. +3. Control flow should be easy to detect to help in formal verification and optimization. + +In order to achieve the first and last goal, assembly provides high-level constructs +like ``for`` loops, ``switch`` statements and function calls. It should be possible +to write assembly programs that do not make use of explicit ``SWAP``, ``DUP``, +``JUMP`` and ``JUMPI`` statements, because the first two obfuscate the data flow +and the last two obfuscate control flow. Furthermore, functional statements of +the form ``mul(add(x, y), 7)`` are preferred over pure opcode statements like +``7 y x add mul`` because in the first form, it is much easier to see which +operand is used for which opcode. + +The second goal is achieved by introducing a desugaring phase that only removes +the higher level constructs in a very regular way and still allows inspecting +the generated low-level assembly code. The only non-local operation performed +by the assembler is name lookup of user-defined identifiers (functions, variables, ...), +which follow very simple and regular scoping rules and cleanup of local variables from the stack. + +Scoping: An identifier that is declared (label, variable, function, assembly) +is only visible in the block where it was declared (including nested blocks +inside the current block). It is not legal to access local variables across +function borders, even if they would be in scope. Shadowing is allowed, but +two identifiers with the same name cannot be declared in the same block. +Local variables cannot be accessed before they were declared, but labels, +functions and assemblies can. Assemblies are special blocks that are used +for e.g. returning runtime code or creating contracts. No identifier from an +outer assembly is visible in a sub-assembly. + +If control flow passes over the end of a block, pop instructions are inserted +that match the number of local variables declared in that block, unless the +``}`` is directly preceded by an opcode that does not have a continuing control +flow path. Whenever a local variable is referenced, the code generator needs +to know its current relative position in the stack and thus it needs to +keep track of the current so-called stack height. +At the end of a block, this implicit stack height is always reduced by the number +of local variables whether ther is a continuing control flow or not. + +This means that the stack height before and after the block should be the same. +If this is not the case, a warning is issued, +unless the last instruction in the block did not have a continuing control flow path. + +Why do we use higher-level constructs like ``switch``, ``for`` and functions: + +Using ``switch``, ``for`` and functions, it should be possible to write +complex code without using ``jump`` or ``jumpi`` manually. This makes it much +easier to analyze the control flow, which allows for improved formal +verification and optimization. + +Furthermore, if manual jumps are allowed, computing the stack height is rather complicated. +The position of all local variables on the stack needs to be known, otherwise +neither references to local variables nor removing local variables automatically +from the stack at the end of a block will work properly. Because of that, +every label that is preceded by an instruction that ends or diverts control flow +should be annotated with the current stack layout. This annotation is performed +automatically during the desugaring phase. + +Example: + +We will follow an example compilation from Solidity to desugared assembly. +We consider the runtime bytecode of the following Solidity program:: + + contract C { + function f(uint x) returns (uint y) { + y = 1; + for (uint i = 0; i < x; i++) + y = 2 * y; + } + } + +The following assembly will be generated:: + + { + mstore(0x40, 0x60) // store the "free memory pointer" + // function dispatcher + switch div(calldataload(0), exp(2, 226)) + case 0xb3de648b: { + let (r) = f(calldataload(4)) + let ret := $allocate(0x20) + mstore(ret, r) + return(ret, 0x20) + } + default: { jump(invalidJumpLabel) } + // memory allocator + function $allocate(size) -> (pos) { + pos := mload(0x40) + mstore(0x40, add(pos, size)) + } + // the contract function + function f(x) -> (y) { + y := 1 + for { let i := 0 } lt(i, x) { i := add(i, 1) } { + y := mul(2, y) + } + } + } + +After the desugaring phase it looks as follows:: + + { + mstore(0x40, 0x60) + { + let $0 := div(calldataload(0), exp(2, 226)) + jumpi($case1, eq($0, 0xb3de648b)) + jump($caseDefault) + $case1: + { + // the function call - we put return label and arguments on the stack + $ret1 calldataload(4) jump($fun_f) + $ret1 [r]: // a label with a [...]-annotation resets the stack height + // to "current block + number of local variables". It also + // introduces a variable, r: + // r is at top of stack, $0 is below (from enclosing block) + $ret2 0x20 jump($fun_allocate) + $ret2 [ret]: // stack here: $0, r, ret (top) + mstore(ret, r) + return(ret, 0x20) + // although it is useless, the jump is automatically inserted, + // since the desugaring process does not analyze control-flow + jump($endswitch) + } + $caseDefault: + { + jump(invalidJumpLabel) + jump($endswitch) + } + $endswitch: + } + jump($afterFunction) + $fun_allocate: + { + $start[$retpos, size]: + // output variables live in the same scope as the arguments. + let pos := 0 + { + pos := mload(0x40) + mstore(0x40, add(pos, size)) + } + swap1 pop swap1 jump + } + $fun_f: + { + start [$retpos, x]: + let y := 0 + { + let i := 0 + $for_begin: + jumpi($for_end, iszero(lt(i, x))) + { + y := mul(2, y) + } + $for_continue: + { i := add(i, 1) } + jump($for_begin) + $for_end: + } // Here, a pop instruction is inserted for i + swap1 pop swap1 jump + } + $afterFunction: + stop + } + Assembly happens in four stages: @@ -734,6 +740,9 @@ Assembly happens in four stages: 3. Opcode stream generation 4. Bytecode generation +We will specify steps one to three in a pseudo-formal way. More formal +specifications will follow. + Parsing / Grammar ----------------- -- cgit From 8ace851831f63035174ae086bb415a0d2efb8277 Mon Sep 17 00:00:00 2001 From: VoR0220 Date: Tue, 10 Jan 2017 07:15:48 -0600 Subject: much smaller helper function Signed-off-by: VoR0220 --- libsolidity/interface/CompilerStack.cpp | 6 ++---- libsolidity/interface/CompilerStack.h | 4 +++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/libsolidity/interface/CompilerStack.cpp b/libsolidity/interface/CompilerStack.cpp index 1a95f5a6..094360a3 100644 --- a/libsolidity/interface/CompilerStack.cpp +++ b/libsolidity/interface/CompilerStack.cpp @@ -515,10 +515,8 @@ string CompilerStack::applyRemapping(string const& _path, string const& _context for (auto const& redir: m_remappings) { - filepath redirContext(redir.context); - filepath redirPrefix(redir.prefix); - string contextFileString = redirContext.generic_string(); - string prefixFileString = redirPrefix.generic_string(); + string contextFileString = sanitizePath(redir.context); + string prefixFileString = sanitizePath(redir.prefix); // Skip if there is a prefix collision and the current context is closer if (longestContext > 0 && contextFileString.length() < longestContext) continue; diff --git a/libsolidity/interface/CompilerStack.h b/libsolidity/interface/CompilerStack.h index f98a457a..ef0aa6fe 100644 --- a/libsolidity/interface/CompilerStack.h +++ b/libsolidity/interface/CompilerStack.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -239,7 +240,8 @@ private: ContractDefinition const& _contract, std::map& _compiledContracts ); - + /// Helper function to return path converted strings. + std::string sanitizePath(std::string const& _path) { return boost::filesystem::path(_path).generic_string(); } void link(); Contract const& contract(std::string const& _contractName = "") const; -- cgit From d61b911a3a02962d5c38ad8b77ab650352529659 Mon Sep 17 00:00:00 2001 From: Federico Bond Date: Wed, 11 Jan 2017 00:51:28 -0300 Subject: grammar.txt: Move StorageLocation? up to VariableDeclaration --- docs/grammar.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/grammar.txt b/docs/grammar.txt index b5d2b780..2300dd00 100644 --- a/docs/grammar.txt +++ b/docs/grammar.txt @@ -35,10 +35,10 @@ TypeNameList = '(' ( TypeName (',' TypeName )* )? ')' // semantic restriction: mappings and structs (recursively) containing mappings // are not allowed in argument lists -VariableDeclaration = TypeName Identifier +VariableDeclaration = TypeName StorageLocation? Identifier TypeName = ElementaryTypeName - | UserDefinedTypeName StorageLocation? + | UserDefinedTypeName | Mapping | ArrayTypeName | FunctionTypeName @@ -46,7 +46,7 @@ TypeName = ElementaryTypeName UserDefinedTypeName = Identifier ( '.' Identifier )* Mapping = 'mapping' '(' ElementaryTypeName '=>' TypeName ')' -ArrayTypeName = TypeName '[' Expression? ']' StorageLocation? +ArrayTypeName = TypeName '[' Expression? ']' FunctionTypeName = 'function' TypeNameList ( 'internal' | 'external' | 'constant' | 'payable' )* ( 'returns' TypeNameList )? StorageLocation = 'memory' | 'storage' -- cgit From c15b0fb596a3466a8f31f9b4f9cd0289091264aa Mon Sep 17 00:00:00 2001 From: Federico Bond Date: Wed, 11 Jan 2017 01:33:17 -0300 Subject: grammar.txt: Add named function arguments --- docs/grammar.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/grammar.txt b/docs/grammar.txt index 2300dd00..f45a62e7 100644 --- a/docs/grammar.txt +++ b/docs/grammar.txt @@ -97,7 +97,13 @@ PrimaryExpression = Identifier | StringLiteral | ElementaryTypeNameExpression -FunctionCall = ( PrimaryExpression | NewExpression | TypeName ) ( ( '.' Identifier ) | ( '[' Expression ']' ) )* '(' Expression? ( ',' Expression )* ')' +ExpressionList = Expression ( ',' Expression )* +NameValueList = Identifier ':' Expression ( ',' Identifier ':' Expression )* + +FunctionCall = ( PrimaryExpression | NewExpression | TypeName ) ( ( '.' Identifier ) | ( '[' Expression ']' ) )* '(' FunctionCallArguments ')' +FunctionCallArguments = '{' NameValueList? '}' + | ExpressionList? + NewExpression = 'new' Identifier MemberAccess = Expression '.' Identifier IndexAccess = Expression '[' Expression? ']' -- cgit From 26eff0e0d8c0c34a614e6c7151e248168f72cab7 Mon Sep 17 00:00:00 2001 From: Federico Bond Date: Wed, 11 Jan 2017 01:35:29 -0300 Subject: grammar.txt: Replace Identifier for Typename in NewExpression --- docs/grammar.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/grammar.txt b/docs/grammar.txt index f45a62e7..62b4a021 100644 --- a/docs/grammar.txt +++ b/docs/grammar.txt @@ -104,7 +104,7 @@ FunctionCall = ( PrimaryExpression | NewExpression | TypeName ) ( ( '.' Identifi FunctionCallArguments = '{' NameValueList? '}' | ExpressionList? -NewExpression = 'new' Identifier +NewExpression = 'new' TypeName MemberAccess = Expression '.' Identifier IndexAccess = Expression '[' Expression? ']' -- cgit From e02270bbb4a9f151bc25e0e0f96f1e4505542de5 Mon Sep 17 00:00:00 2001 From: VoR0220 Date: Wed, 11 Jan 2017 09:03:41 -0600 Subject: fixed unused filepath bug Signed-off-by: VoR0220 --- libsolidity/interface/CompilerStack.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libsolidity/interface/CompilerStack.cpp b/libsolidity/interface/CompilerStack.cpp index 094360a3..30abad07 100644 --- a/libsolidity/interface/CompilerStack.cpp +++ b/libsolidity/interface/CompilerStack.cpp @@ -508,7 +508,6 @@ string CompilerStack::applyRemapping(string const& _path, string const& _context return std::equal(_a.begin(), _a.end(), _b.begin()); }; - using filepath = boost::filesystem::path; size_t longestPrefix = 0; size_t longestContext = 0; string longestPrefixTarget; @@ -524,7 +523,7 @@ string CompilerStack::applyRemapping(string const& _path, string const& _context if (!isPrefixOf(contextFileString, _context)) continue; // Skip if we already have a closer match. - if (longestPrefix > 0 && prefixFileString.length() < longestPrefix) + if (prefixFileString.length() < longestPrefix) continue; // Skip if the prefix does not match. if (!isPrefixOf(prefixFileString, _path)) -- cgit From 643182865bc70d654512c45b90363c838e09bef6 Mon Sep 17 00:00:00 2001 From: chriseth Date: Wed, 11 Jan 2017 16:20:13 +0100 Subject: Changelog entry about assembly output --- Changelog.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Changelog.md b/Changelog.md index 0ac0cb2f..8a005eed 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,5 +1,8 @@ ### 0.4.8 (unreleased) +Features: + * Output: Print assembly in new standardized Solidity assembly format. + BugFixes: * Type checker, code generator: enable access to events of base contracts' names. * Imports: ``import ".dir/a"`` is not a relative path. Relative paths begin with directory ``.`` or ``..``. -- cgit From e96c32a072160428a93720a21c51cd8670d5e664 Mon Sep 17 00:00:00 2001 From: VoR0220 Date: Wed, 11 Jan 2017 09:32:07 -0600 Subject: changelog entry Signed-off-by: VoR0220 --- Changelog.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Changelog.md b/Changelog.md index 0ac0cb2f..c06313ba 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,6 +1,8 @@ ### 0.4.8 (unreleased) BugFixes: + * Remappings: a=b would overwrite c:a=d. This has now been fixed to all modules except + c using b as their target, with c using d as the target. * Type checker, code generator: enable access to events of base contracts' names. * Imports: ``import ".dir/a"`` is not a relative path. Relative paths begin with directory ``.`` or ``..``. -- cgit From 4585bfdce7716cd4837f71b565bb9a6dff8c2d7e Mon Sep 17 00:00:00 2001 From: VoR0220 Date: Wed, 11 Jan 2017 10:56:35 -0600 Subject: cleanup Signed-off-by: VoR0220 fixup Signed-off-by: VoR0220 --- Changelog.md | 3 +-- libsolidity/interface/CompilerStack.cpp | 24 ++++++++++++------------ libsolidity/interface/CompilerStack.h | 5 +++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Changelog.md b/Changelog.md index c06313ba..52f86d40 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,8 +1,7 @@ ### 0.4.8 (unreleased) BugFixes: - * Remappings: a=b would overwrite c:a=d. This has now been fixed to all modules except - c using b as their target, with c using d as the target. + * Remappings: Prefer longer context over longer prefix. * Type checker, code generator: enable access to events of base contracts' names. * Imports: ``import ".dir/a"`` is not a relative path. Relative paths begin with directory ``.`` or ``..``. diff --git a/libsolidity/interface/CompilerStack.cpp b/libsolidity/interface/CompilerStack.cpp index 30abad07..1e571cae 100644 --- a/libsolidity/interface/CompilerStack.cpp +++ b/libsolidity/interface/CompilerStack.cpp @@ -510,28 +510,28 @@ string CompilerStack::applyRemapping(string const& _path, string const& _context size_t longestPrefix = 0; size_t longestContext = 0; - string longestPrefixTarget; + string bestMatchTarget; for (auto const& redir: m_remappings) { - string contextFileString = sanitizePath(redir.context); - string prefixFileString = sanitizePath(redir.prefix); - // Skip if there is a prefix collision and the current context is closer - if (longestContext > 0 && contextFileString.length() < longestContext) + string context = sanitizePath(redir.context); + string prefix = sanitizePath(redir.prefix); + // Skip if current context is closer + if (context.length() < longestContext) continue; // Skip if redir.context is not a prefix of _context - if (!isPrefixOf(contextFileString, _context)) + if (!isPrefixOf(context, _context)) continue; - // Skip if we already have a closer match. - if (prefixFileString.length() < longestPrefix) + // Skip if we already have a closer prefix match. + if (prefix.length() < longestPrefix) continue; // Skip if the prefix does not match. - if (!isPrefixOf(prefixFileString, _path)) + if (!isPrefixOf(prefix, _path)) continue; - longestContext = contextFileString.length(); - longestPrefix = prefixFileString.length(); - longestPrefixTarget = redir.target; + longestContext = context.length(); + longestPrefix = prefix.length(); + bestMatchTarget = redir.target; } string path = longestPrefixTarget; diff --git a/libsolidity/interface/CompilerStack.h b/libsolidity/interface/CompilerStack.h index ef0aa6fe..d49a8df1 100644 --- a/libsolidity/interface/CompilerStack.h +++ b/libsolidity/interface/CompilerStack.h @@ -235,13 +235,14 @@ private: bool checkLibraryNameClashes(); /// @returns the absolute path corresponding to @a _path relative to @a _reference. std::string absolutePath(std::string const& _path, std::string const& _reference) const; + /// Helper function to return path converted strings. + std::string sanitizePath(std::string const& _path) const { return boost::filesystem::path(_path).generic_string(); } + /// Compile a single contract and put the result in @a _compiledContracts. void compileContract( ContractDefinition const& _contract, std::map& _compiledContracts ); - /// Helper function to return path converted strings. - std::string sanitizePath(std::string const& _path) { return boost::filesystem::path(_path).generic_string(); } void link(); Contract const& contract(std::string const& _contractName = "") const; -- cgit From bff76c1ca0433f88874fded8990f39292483d9f0 Mon Sep 17 00:00:00 2001 From: Yoichi Hirai Date: Tue, 1 Nov 2016 17:18:00 +0100 Subject: analysis: report errors when inheritance causes collision --- libsolidity/analysis/NameAndTypeResolver.cpp | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/libsolidity/analysis/NameAndTypeResolver.cpp b/libsolidity/analysis/NameAndTypeResolver.cpp index 2a33a501..fc107ba0 100644 --- a/libsolidity/analysis/NameAndTypeResolver.cpp +++ b/libsolidity/analysis/NameAndTypeResolver.cpp @@ -289,7 +289,31 @@ void NameAndTypeResolver::importInheritedScope(ContractDefinition const& _base) for (auto const& declaration: nameAndDeclaration.second) // Import if it was declared in the base, is not the constructor and is visible in derived classes if (declaration->scope() == &_base && declaration->isVisibleInDerivedContracts()) - m_currentScope->registerDeclaration(*declaration); + if (!m_currentScope->registerDeclaration(*declaration)) + { + SourceLocation firstDeclarationLocation; + SourceLocation secondDeclarationLocation; + Declaration const* conflictingDeclaration = m_currentScope->conflictingDeclaration(*declaration); + solAssert(conflictingDeclaration, ""); + + if (declaration->location().start < conflictingDeclaration->location().start) + { + firstDeclarationLocation = declaration->location(); + secondDeclarationLocation = conflictingDeclaration->location(); + } + else + { + firstDeclarationLocation = conflictingDeclaration->location(); + secondDeclarationLocation = declaration->location(); + } + + reportDeclarationError( + secondDeclarationLocation, + "Identifier already declared.", + firstDeclarationLocation, + "The previous declaration is here:" + ); + } } void NameAndTypeResolver::linearizeBaseContracts(ContractDefinition& _contract) -- cgit From 42b672617392ecb294447f73fd011092cd58702b Mon Sep 17 00:00:00 2001 From: Yoichi Hirai Date: Fri, 2 Dec 2016 16:06:01 +0100 Subject: analysis: allow some shadowings explicitly --- libsolidity/analysis/NameAndTypeResolver.cpp | 8 ++++ test/libsolidity/SolidityEndToEndTest.cpp | 54 ---------------------- test/libsolidity/SolidityNameAndTypeResolution.cpp | 23 ++++++++- 3 files changed, 29 insertions(+), 56 deletions(-) diff --git a/libsolidity/analysis/NameAndTypeResolver.cpp b/libsolidity/analysis/NameAndTypeResolver.cpp index fc107ba0..002ff811 100644 --- a/libsolidity/analysis/NameAndTypeResolver.cpp +++ b/libsolidity/analysis/NameAndTypeResolver.cpp @@ -296,6 +296,14 @@ void NameAndTypeResolver::importInheritedScope(ContractDefinition const& _base) Declaration const* conflictingDeclaration = m_currentScope->conflictingDeclaration(*declaration); solAssert(conflictingDeclaration, ""); + // Usual shadowing is not an error + if (dynamic_cast(declaration) && dynamic_cast(conflictingDeclaration)) + continue; + + // Usual shadowing is not an error + if (dynamic_cast(declaration) && dynamic_cast(conflictingDeclaration)) + continue; + if (declaration->location().start < conflictingDeclaration->location().start) { firstDeclarationLocation = declaration->location(); diff --git a/test/libsolidity/SolidityEndToEndTest.cpp b/test/libsolidity/SolidityEndToEndTest.cpp index d8539524..19161831 100644 --- a/test/libsolidity/SolidityEndToEndTest.cpp +++ b/test/libsolidity/SolidityEndToEndTest.cpp @@ -4869,60 +4869,6 @@ BOOST_AUTO_TEST_CASE(proper_order_of_overwriting_of_attributes) BOOST_CHECK(callContractFunction("ok()") == encodeArgs(false)); } -BOOST_AUTO_TEST_CASE(proper_overwriting_accessor_by_function) -{ - // bug #1798 - char const* sourceCode = R"( - contract attribute { - bool ok = false; - } - contract func { - function ok() returns (bool) { return true; } - } - - contract attr_func is attribute, func { - function checkOk() returns (bool) { return ok(); } - } - contract func_attr is func, attribute { - function checkOk() returns (bool) { return ok; } - } - )"; - compileAndRun(sourceCode, 0, "attr_func"); - BOOST_CHECK(callContractFunction("ok()") == encodeArgs(true)); - compileAndRun(sourceCode, 0, "func_attr"); - BOOST_CHECK(callContractFunction("checkOk()") == encodeArgs(false)); -} - - -BOOST_AUTO_TEST_CASE(overwriting_inheritance) -{ - // bug #1798 - char const* sourceCode = R"( - contract A { - function ok() returns (uint) { return 1; } - } - contract B { - function ok() returns (uint) { return 2; } - } - contract C { - uint ok = 6; - } - contract AB is A, B { - function ok() returns (uint) { return 4; } - } - contract reversedE is C, AB { - function checkOk() returns (uint) { return ok(); } - } - contract E is AB, C { - function checkOk() returns (uint) { return ok; } - } - )"; - compileAndRun(sourceCode, 0, "reversedE"); - BOOST_CHECK(callContractFunction("checkOk()") == encodeArgs(4)); - compileAndRun(sourceCode, 0, "E"); - BOOST_CHECK(callContractFunction("checkOk()") == encodeArgs(6)); -} - BOOST_AUTO_TEST_CASE(struct_assign_reference_to_struct) { char const* sourceCode = R"( diff --git a/test/libsolidity/SolidityNameAndTypeResolution.cpp b/test/libsolidity/SolidityNameAndTypeResolution.cpp index 576421fd..11ce9997 100644 --- a/test/libsolidity/SolidityNameAndTypeResolution.cpp +++ b/test/libsolidity/SolidityNameAndTypeResolution.cpp @@ -1056,7 +1056,7 @@ BOOST_AUTO_TEST_CASE(modifier_overrides_function) contract A { modifier mod(uint a) { _; } } contract B is A { function mod(uint a) { } } )"; - CHECK_ERROR(text, TypeError, ""); + CHECK_ERROR(text, DeclarationError, ""); } BOOST_AUTO_TEST_CASE(function_overrides_modifier) @@ -1065,7 +1065,7 @@ BOOST_AUTO_TEST_CASE(function_overrides_modifier) contract A { function mod(uint a) { } } contract B is A { modifier mod(uint a) { _; } } )"; - CHECK_ERROR(text, TypeError, ""); + CHECK_ERROR(text, DeclarationError, ""); } BOOST_AUTO_TEST_CASE(modifier_returns_value) @@ -4304,6 +4304,25 @@ BOOST_AUTO_TEST_CASE(illegal_override_payable_nonpayable) CHECK_ERROR(text, TypeError, ""); } +BOOST_AUTO_TEST_CASE(function_variable_mixin) +{ + // bug #1798 (cpp-ethereum), related to #1286 (solidity) + char const* text = R"( + contract attribute { + bool ok = false; + } + contract func { + function ok() returns (bool) { return true; } + } + + contract attr_func is attribute, func { + function checkOk() returns (bool) { return ok(); } + } + )"; + CHECK_ERROR(text, DeclarationError, ""); +} + + BOOST_AUTO_TEST_CASE(payable_constant_conflict) { char const* text = R"( -- cgit From 91d4e8e0bab6f5285103af6ef84a7813cfbb61a2 Mon Sep 17 00:00:00 2001 From: Yoichi Hirai Date: Fri, 2 Dec 2016 16:24:53 +0100 Subject: analysis: changes necessary to compile std/StandardToken.sol --- libsolidity/analysis/DeclarationContainer.cpp | 13 +++++++-- libsolidity/analysis/NameAndTypeResolver.cpp | 38 +++++++++++++++++++++------ libsolidity/analysis/TypeChecker.cpp | 19 ++++++++++++-- 3 files changed, 58 insertions(+), 12 deletions(-) diff --git a/libsolidity/analysis/DeclarationContainer.cpp b/libsolidity/analysis/DeclarationContainer.cpp index 1599b83a..f8c12c5b 100644 --- a/libsolidity/analysis/DeclarationContainer.cpp +++ b/libsolidity/analysis/DeclarationContainer.cpp @@ -44,10 +44,19 @@ Declaration const* DeclarationContainer::conflictingDeclaration( if (dynamic_cast(&_declaration)) { - // check that all other declarations with the same name are functions + // check that all other declarations with the same name are functions or a public state variable for (Declaration const* declaration: declarations) - if (!dynamic_cast(declaration)) + { + if (dynamic_cast(declaration)) + continue; + if (auto variableDeclaration = dynamic_cast(declaration)) + { + if (variableDeclaration->isStateVariable() && !variableDeclaration->isConstant() && variableDeclaration->isPublic()) + continue; return declaration; + } + return declaration; + } } else if (declarations.size() == 1 && declarations.front() == &_declaration) return nullptr; diff --git a/libsolidity/analysis/NameAndTypeResolver.cpp b/libsolidity/analysis/NameAndTypeResolver.cpp index 002ff811..ee05910c 100644 --- a/libsolidity/analysis/NameAndTypeResolver.cpp +++ b/libsolidity/analysis/NameAndTypeResolver.cpp @@ -260,20 +260,42 @@ vector NameAndTypeResolver::cleanedDeclarations( for (auto it = _declarations.begin(); it != _declarations.end(); ++it) { solAssert(*it, ""); - // the declaration is functionDefinition while declarations > 1 - FunctionDefinition const& functionDefinition = dynamic_cast(**it); - FunctionType functionType(functionDefinition); - for (auto parameter: functionType.parameterTypes() + functionType.returnParameterTypes()) - if (!parameter) - reportFatalDeclarationError(_identifier.location(), "Function type can not be used in this context"); + // the declaration is functionDefinition or a VariableDeclaration while declarations > 1 + solAssert(dynamic_cast(*it) || dynamic_cast(*it), + "Found overloading involving something not a function or a variable"); + + shared_ptr functionType {}; + + if (FunctionDefinition const* functionDefinition = dynamic_cast(*it)) + { + functionType = make_shared(*functionDefinition); + for (auto parameter: functionType->parameterTypes() + functionType->returnParameterTypes()) + if (!parameter) + reportFatalDeclarationError(_identifier.location(), "Function type can not be used in this context"); + } + else + { + VariableDeclaration const* variableDeclaration = dynamic_cast(*it); + functionType = make_shared(*variableDeclaration); + } + solAssert(functionType, "failed to determine the function type of the overloaded"); if (uniqueFunctions.end() == find_if( uniqueFunctions.begin(), uniqueFunctions.end(), [&](Declaration const* d) { - FunctionType newFunctionType(dynamic_cast(*d)); - return functionType.hasEqualArgumentTypes(newFunctionType); + if (FunctionDefinition const* functionDefinition = dynamic_cast(d)) + { + FunctionType const newFunctionType(*functionDefinition); + return functionType->hasEqualArgumentTypes(newFunctionType); + } + else if (VariableDeclaration const* variableDeclaration = dynamic_cast(d)) + { + FunctionType const newFunctionType(*variableDeclaration); + return functionType->hasEqualArgumentTypes(newFunctionType); + } + return false; // to make compiler happy } )) uniqueFunctions.push_back(*it); diff --git a/libsolidity/analysis/TypeChecker.cpp b/libsolidity/analysis/TypeChecker.cpp index e414e27c..67c8ac17 100644 --- a/libsolidity/analysis/TypeChecker.cpp +++ b/libsolidity/analysis/TypeChecker.cpp @@ -1500,8 +1500,23 @@ bool TypeChecker::visit(Identifier const& _identifier) if (!annotation.referencedDeclaration) { if (!annotation.argumentTypes) - fatalTypeError(_identifier.location(), "Unable to determine overloaded type."); - if (annotation.overloadedDeclarations.empty()) + { + // The identifier should be a public state variable shadowing other functions + vector candidates; + + for (Declaration const* declaration: annotation.overloadedDeclarations) + { + if (VariableDeclaration const* variableDeclaration = dynamic_cast(declaration)) + candidates.push_back(declaration); + } + if (candidates.empty()) + fatalTypeError(_identifier.location(), "No matching declaration found after variable lookup."); + else if (candidates.size() == 1) + annotation.referencedDeclaration = candidates.front(); + else + fatalTypeError(_identifier.location(), "No unique declaration found after variable lookup."); + } + else if (annotation.overloadedDeclarations.empty()) fatalTypeError(_identifier.location(), "No candidates for overload resolution found."); else if (annotation.overloadedDeclarations.size() == 1) annotation.referencedDeclaration = *annotation.overloadedDeclarations.begin(); -- cgit From 0216f34010343af6285305b59d47db62261c03c6 Mon Sep 17 00:00:00 2001 From: Yoichi Hirai Date: Mon, 5 Dec 2016 12:03:07 +0100 Subject: analysis: avoid emscripten build failure --- libsolidity/analysis/NameAndTypeResolver.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libsolidity/analysis/NameAndTypeResolver.cpp b/libsolidity/analysis/NameAndTypeResolver.cpp index ee05910c..260965ae 100644 --- a/libsolidity/analysis/NameAndTypeResolver.cpp +++ b/libsolidity/analysis/NameAndTypeResolver.cpp @@ -264,11 +264,11 @@ vector NameAndTypeResolver::cleanedDeclarations( solAssert(dynamic_cast(*it) || dynamic_cast(*it), "Found overloading involving something not a function or a variable"); - shared_ptr functionType {}; + unique_ptr functionType {}; if (FunctionDefinition const* functionDefinition = dynamic_cast(*it)) { - functionType = make_shared(*functionDefinition); + functionType = unique_ptr(new FunctionType(*functionDefinition)); for (auto parameter: functionType->parameterTypes() + functionType->returnParameterTypes()) if (!parameter) reportFatalDeclarationError(_identifier.location(), "Function type can not be used in this context"); @@ -276,7 +276,7 @@ vector NameAndTypeResolver::cleanedDeclarations( else { VariableDeclaration const* variableDeclaration = dynamic_cast(*it); - functionType = make_shared(*variableDeclaration); + functionType = unique_ptr(new FunctionType(*variableDeclaration)); } solAssert(functionType, "failed to determine the function type of the overloaded"); -- cgit From 9602318eab7affca78b8b5db018e9e646f40e84e Mon Sep 17 00:00:00 2001 From: Yoichi Hirai Date: Wed, 14 Dec 2016 14:18:11 +0100 Subject: test: allow multiple errors in modifier-function overriding --- test/libsolidity/SolidityNameAndTypeResolution.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/libsolidity/SolidityNameAndTypeResolution.cpp b/test/libsolidity/SolidityNameAndTypeResolution.cpp index 11ce9997..9f6ea2b3 100644 --- a/test/libsolidity/SolidityNameAndTypeResolution.cpp +++ b/test/libsolidity/SolidityNameAndTypeResolution.cpp @@ -1056,7 +1056,9 @@ BOOST_AUTO_TEST_CASE(modifier_overrides_function) contract A { modifier mod(uint a) { _; } } contract B is A { function mod(uint a) { } } )"; - CHECK_ERROR(text, DeclarationError, ""); + // Error: Identifier already declared. + // Error: Override changes modifier to function. + CHECK_ERROR_ALLOW_MULTI(text, DeclarationError, ""); } BOOST_AUTO_TEST_CASE(function_overrides_modifier) @@ -1065,7 +1067,9 @@ BOOST_AUTO_TEST_CASE(function_overrides_modifier) contract A { function mod(uint a) { } } contract B is A { modifier mod(uint a) { _; } } )"; - CHECK_ERROR(text, DeclarationError, ""); + // Error: Identifier already declared. + // Error: Override changes function to modifier. + CHECK_ERROR_ALLOW_MULTI(text, DeclarationError, ""); } BOOST_AUTO_TEST_CASE(modifier_returns_value) -- cgit From bd6e65a89c1e90a01740c61af84b17be7ade26a0 Mon Sep 17 00:00:00 2001 From: Yoichi Hirai Date: Tue, 3 Jan 2017 19:40:50 +0100 Subject: docs: document the resriction about a function and an event of the same name, and so on --- docs/contracts.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/contracts.rst b/docs/contracts.rst index e82b7495..edc42c3d 100644 --- a/docs/contracts.rst +++ b/docs/contracts.rst @@ -877,6 +877,13 @@ cannot be resolved. A simple rule to remember is to specify the base classes in the order from "most base-like" to "most derived". +Inheriting Different Kinds of Members of the Same Name +====================================================== + +When the inheritance results in a contract with a function and a modifier of the same name, it is considered as an error. +This error is produced also by an event and a modifier of the same name, and a function and an event of the same name. +As an exception, a state variable accessor can override a public function. + .. index:: ! contract;abstract, ! abstract contract ****************** -- cgit From e254a59bd29b7bf226197eaa836fc163be16ef09 Mon Sep 17 00:00:00 2001 From: Yoichi Hirai Date: Mon, 9 Jan 2017 18:32:02 +0100 Subject: Changelog: add a line about #1479 --- Changelog.md | 1 + 1 file changed, 1 insertion(+) diff --git a/Changelog.md b/Changelog.md index 8a005eed..81f08c9f 100644 --- a/Changelog.md +++ b/Changelog.md @@ -6,6 +6,7 @@ Features: BugFixes: * Type checker, code generator: enable access to events of base contracts' names. * Imports: ``import ".dir/a"`` is not a relative path. Relative paths begin with directory ``.`` or ``..``. + * Type checker, disallow inheritances of different kinds (e.g. a function and a modifier) of members of the same name ### 0.4.7 (2016-12-15) -- cgit From 4542f459f165502ed9537bb570de44640cdc4228 Mon Sep 17 00:00:00 2001 From: VoR0220 Date: Wed, 11 Jan 2017 11:45:14 -0600 Subject: added fix and a test for order independence of nested prefixing Signed-off-by: VoR0220 --- libsolidity/interface/CompilerStack.cpp | 4 ++-- test/libsolidity/Imports.cpp | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/libsolidity/interface/CompilerStack.cpp b/libsolidity/interface/CompilerStack.cpp index 1e571cae..e4c351ff 100644 --- a/libsolidity/interface/CompilerStack.cpp +++ b/libsolidity/interface/CompilerStack.cpp @@ -523,7 +523,7 @@ string CompilerStack::applyRemapping(string const& _path, string const& _context if (!isPrefixOf(context, _context)) continue; // Skip if we already have a closer prefix match. - if (prefix.length() < longestPrefix) + if (prefix.length() < longestPrefix && context.length() == longestContext) continue; // Skip if the prefix does not match. if (!isPrefixOf(prefix, _path)) @@ -534,7 +534,7 @@ string CompilerStack::applyRemapping(string const& _path, string const& _context bestMatchTarget = redir.target; } - string path = longestPrefixTarget; + string path = bestMatchTarget; path.append(_path.begin() + longestPrefix, _path.end()); return path; } diff --git a/test/libsolidity/Imports.cpp b/test/libsolidity/Imports.cpp index 7945f729..712f133a 100644 --- a/test/libsolidity/Imports.cpp +++ b/test/libsolidity/Imports.cpp @@ -183,6 +183,24 @@ BOOST_AUTO_TEST_CASE(context_dependent_remappings_ensure_default_and_module_pres BOOST_CHECK(c.compile()); } +BOOST_AUTO_TEST_CASE(context_dependent_remappings_order_independent) +{ + CompilerStack c; + c.setRemappings(vector{"a:x/y/z=d", "a/b:x=e"}); + c.addSource("a/main.sol", "import \"x/y/z/z.sol\"; contract Main is D {} pragma solidity >=0.0;"); + c.addSource("a/b/main.sol", "import \"x/y/z/z.sol\"; contract Main is E {} pragma solidity >=0.0;"); + c.addSource("x/y/z/z.sol", "contract D {} pragma solidity >=0.0;"); + c.addSource("e/y/z/z.sol", "contract E {} pragma solidity >=0.0;"); + BOOST_CHECK(c.compile()); + CompilerStack d; + d.setRemappings(vector{"a/b:x=e", "a:x/y/z=d"}); + d.addSource("a/main.sol", "import \"x/y/z/z.sol\"; contract Main is D {} pragma solidity >=0.0;"); + d.addSource("a/b/main.sol", "import \"x/y/z/z.sol\"; contract Main is E {} pragma solidity >=0.0;"); + d.addSource("x/y/z/z.sol", "contract D {} pragma solidity >=0.0;"); + d.addSource("e/y/z/z.sol", "contract E {} pragma solidity >=0.0;"); + BOOST_CHECK(d.compile()); +} + BOOST_AUTO_TEST_SUITE_END() } -- cgit From b6508ca992531154a572320bf8bb117e3b9294b9 Mon Sep 17 00:00:00 2001 From: VoR0220 Date: Wed, 11 Jan 2017 12:03:54 -0600 Subject: fixed Signed-off-by: VoR0220 --- libsolidity/interface/CompilerStack.cpp | 4 ++-- test/libsolidity/Imports.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libsolidity/interface/CompilerStack.cpp b/libsolidity/interface/CompilerStack.cpp index e4c351ff..a31df584 100644 --- a/libsolidity/interface/CompilerStack.cpp +++ b/libsolidity/interface/CompilerStack.cpp @@ -516,6 +516,7 @@ string CompilerStack::applyRemapping(string const& _path, string const& _context { string context = sanitizePath(redir.context); string prefix = sanitizePath(redir.prefix); + // Skip if current context is closer if (context.length() < longestContext) continue; @@ -531,9 +532,8 @@ string CompilerStack::applyRemapping(string const& _path, string const& _context longestContext = context.length(); longestPrefix = prefix.length(); - bestMatchTarget = redir.target; + bestMatchTarget = sanitizePath(redir.target); } - string path = bestMatchTarget; path.append(_path.begin() + longestPrefix, _path.end()); return path; diff --git a/test/libsolidity/Imports.cpp b/test/libsolidity/Imports.cpp index 712f133a..56895fdc 100644 --- a/test/libsolidity/Imports.cpp +++ b/test/libsolidity/Imports.cpp @@ -189,14 +189,14 @@ BOOST_AUTO_TEST_CASE(context_dependent_remappings_order_independent) c.setRemappings(vector{"a:x/y/z=d", "a/b:x=e"}); c.addSource("a/main.sol", "import \"x/y/z/z.sol\"; contract Main is D {} pragma solidity >=0.0;"); c.addSource("a/b/main.sol", "import \"x/y/z/z.sol\"; contract Main is E {} pragma solidity >=0.0;"); - c.addSource("x/y/z/z.sol", "contract D {} pragma solidity >=0.0;"); + c.addSource("d/z.sol", "contract D {} pragma solidity >=0.0;"); c.addSource("e/y/z/z.sol", "contract E {} pragma solidity >=0.0;"); BOOST_CHECK(c.compile()); CompilerStack d; d.setRemappings(vector{"a/b:x=e", "a:x/y/z=d"}); d.addSource("a/main.sol", "import \"x/y/z/z.sol\"; contract Main is D {} pragma solidity >=0.0;"); d.addSource("a/b/main.sol", "import \"x/y/z/z.sol\"; contract Main is E {} pragma solidity >=0.0;"); - d.addSource("x/y/z/z.sol", "contract D {} pragma solidity >=0.0;"); + d.addSource("d/z.sol", "contract D {} pragma solidity >=0.0;"); d.addSource("e/y/z/z.sol", "contract E {} pragma solidity >=0.0;"); BOOST_CHECK(d.compile()); } -- cgit From 5e29b4cde228b7aab0ae6c22ef4190d26cb58ab8 Mon Sep 17 00:00:00 2001 From: Anders Brownworth Date: Wed, 11 Jan 2017 13:13:11 -0500 Subject: Re-word somewhat bumpy sentence to be more clear. --- docs/introduction-to-smart-contracts.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/introduction-to-smart-contracts.rst b/docs/introduction-to-smart-contracts.rst index aee1e03b..4c134abc 100644 --- a/docs/introduction-to-smart-contracts.rst +++ b/docs/introduction-to-smart-contracts.rst @@ -283,8 +283,8 @@ determined at the time the contract is created (it is derived from the creator address and the number of transactions sent from that address, the so-called "nonce"). -Apart from the fact whether an account stores code or not, -the EVM treats the two types equally, though. +Regardless of whether or not the account stores code, the two types are +treated equally by the EVM. Every account has a persistent key-value store mapping 256-bit words to 256-bit words called **storage**. -- cgit From eda147f47bb5a0dfbed50aa2c2224daebafa76a8 Mon Sep 17 00:00:00 2001 From: Yoichi Hirai Date: Tue, 10 Jan 2017 16:26:13 +0100 Subject: ast: add Declaration::functionType() --- libsolidity/analysis/NameAndTypeResolver.cpp | 21 ++++------ libsolidity/ast/AST.cpp | 61 ++++++++++++++++++++++++++++ libsolidity/ast/AST.h | 12 ++++++ 3 files changed, 80 insertions(+), 14 deletions(-) diff --git a/libsolidity/analysis/NameAndTypeResolver.cpp b/libsolidity/analysis/NameAndTypeResolver.cpp index 260965ae..47a9df1b 100644 --- a/libsolidity/analysis/NameAndTypeResolver.cpp +++ b/libsolidity/analysis/NameAndTypeResolver.cpp @@ -264,22 +264,15 @@ vector NameAndTypeResolver::cleanedDeclarations( solAssert(dynamic_cast(*it) || dynamic_cast(*it), "Found overloading involving something not a function or a variable"); - unique_ptr functionType {}; - - if (FunctionDefinition const* functionDefinition = dynamic_cast(*it)) - { - functionType = unique_ptr(new FunctionType(*functionDefinition)); - for (auto parameter: functionType->parameterTypes() + functionType->returnParameterTypes()) - if (!parameter) - reportFatalDeclarationError(_identifier.location(), "Function type can not be used in this context"); - } - else - { - VariableDeclaration const* variableDeclaration = dynamic_cast(*it); - functionType = unique_ptr(new FunctionType(*variableDeclaration)); - } + shared_ptr functionType { (*it)->functionType(false) }; + if (!functionType) + functionType = (*it)->functionType(true); solAssert(functionType, "failed to determine the function type of the overloaded"); + for (auto parameter: functionType->parameterTypes() + functionType->returnParameterTypes()) + if (!parameter) + reportFatalDeclarationError(_identifier.location(), "Function type can not be used in this context"); + if (uniqueFunctions.end() == find_if( uniqueFunctions.begin(), uniqueFunctions.end(), diff --git a/libsolidity/ast/AST.cpp b/libsolidity/ast/AST.cpp index 78d8949c..ac92eaa3 100644 --- a/libsolidity/ast/AST.cpp +++ b/libsolidity/ast/AST.cpp @@ -274,6 +274,45 @@ TypeDeclarationAnnotation& EnumDefinition::annotation() const return static_cast(*m_annotation); } +shared_ptr FunctionDefinition::functionType(bool _internal) const +{ + if (_internal) + { + switch (visibility()) + { + case Declaration::Visibility::Default: + solAssert(false, "visibility() should not return Default"); + case Declaration::Visibility::Private: + case Declaration::Visibility::Internal: + case Declaration::Visibility::Public: + return make_shared(*this, _internal); + case Declaration::Visibility::External: + return {}; + default: + solAssert(false, "visibility() should not return a Visibility"); + } + } + else + { + switch (visibility()) + { + case Declaration::Visibility::Default: + solAssert(false, "visibility() should not return Default"); + case Declaration::Visibility::Private: + case Declaration::Visibility::Internal: + return {}; + case Declaration::Visibility::Public: + case Declaration::Visibility::External: + return make_shared(*this, _internal); + default: + solAssert(false, "visibility() should not return a Visibility"); + } + } + + // To make the compiler happy + return {}; +} + TypePointer FunctionDefinition::type() const { return make_shared(*this); @@ -365,6 +404,28 @@ TypePointer VariableDeclaration::type() const return annotation().type; } +shared_ptr VariableDeclaration::functionType(bool _internal) const +{ + if (_internal) + return {}; + switch (visibility()) + { + case Declaration::Visibility::Default: + solAssert(false, "visibility() should not return Default"); + case Declaration::Visibility::Private: + case Declaration::Visibility::Internal: + return {}; + case Declaration::Visibility::Public: + case Declaration::Visibility::External: + return make_shared(*this); + default: + solAssert(false, "visibility() should not return a Visibility"); + } + + // To make the compiler happy + return {}; +} + VariableDeclarationAnnotation& VariableDeclaration::annotation() const { if (!m_annotation) diff --git a/libsolidity/ast/AST.h b/libsolidity/ast/AST.h index ab4be1ea..088d68db 100644 --- a/libsolidity/ast/AST.h +++ b/libsolidity/ast/AST.h @@ -171,6 +171,10 @@ public: /// This can only be called once types of variable declarations have already been resolved. virtual TypePointer type() const = 0; + /// @param _internal false indicates external interface is concerned, true indicates internal interface is concerned. + /// @returns null when it is not accessible as a function. + virtual std::shared_ptr functionType(bool /*_internal*/) const { return {}; } + protected: virtual Visibility defaultVisibility() const { return Visibility::Public; } @@ -581,6 +585,10 @@ public: virtual TypePointer type() const override; + /// @param _internal false indicates external interface is concerned, true indicates internal interface is concerned. + /// @returns null when it is not accessible as a function. + virtual std::shared_ptr functionType(bool /*_internal*/) const override; + virtual FunctionDefinitionAnnotation& annotation() const override; private: @@ -643,6 +651,10 @@ public: virtual TypePointer type() const override; + /// @param _internal false indicates external interface is concerned, true indicates internal interface is concerned. + /// @returns null when it is not accessible as a function. + virtual std::shared_ptr functionType(bool /*_internal*/) const override; + virtual VariableDeclarationAnnotation& annotation() const override; protected: -- cgit From 7fea4b7360d1db01a33fd2c5a90e966880232e47 Mon Sep 17 00:00:00 2001 From: Yoichi Hirai Date: Tue, 10 Jan 2017 16:34:55 +0100 Subject: analysis: use Declaration::functionType() in another location --- libsolidity/analysis/NameAndTypeResolver.cpp | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/libsolidity/analysis/NameAndTypeResolver.cpp b/libsolidity/analysis/NameAndTypeResolver.cpp index 47a9df1b..08323243 100644 --- a/libsolidity/analysis/NameAndTypeResolver.cpp +++ b/libsolidity/analysis/NameAndTypeResolver.cpp @@ -278,17 +278,10 @@ vector NameAndTypeResolver::cleanedDeclarations( uniqueFunctions.end(), [&](Declaration const* d) { - if (FunctionDefinition const* functionDefinition = dynamic_cast(d)) - { - FunctionType const newFunctionType(*functionDefinition); - return functionType->hasEqualArgumentTypes(newFunctionType); - } - else if (VariableDeclaration const* variableDeclaration = dynamic_cast(d)) - { - FunctionType const newFunctionType(*variableDeclaration); - return functionType->hasEqualArgumentTypes(newFunctionType); - } - return false; // to make compiler happy + shared_ptr newFunctionType { d->functionType(false) }; + if (!newFunctionType) + newFunctionType = d->functionType(true); + return newFunctionType && functionType->hasEqualArgumentTypes(*newFunctionType); } )) uniqueFunctions.push_back(*it); -- cgit From abc24420a7837557e5160db8af83ea3be5a371c3 Mon Sep 17 00:00:00 2001 From: Yoichi Hirai Date: Tue, 10 Jan 2017 18:55:36 +0100 Subject: ast: events have FunctionType too --- libsolidity/ast/AST.cpp | 18 +++++++++++++----- libsolidity/ast/AST.h | 7 ++++--- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/libsolidity/ast/AST.cpp b/libsolidity/ast/AST.cpp index ac92eaa3..6f7a64dc 100644 --- a/libsolidity/ast/AST.cpp +++ b/libsolidity/ast/AST.cpp @@ -274,7 +274,7 @@ TypeDeclarationAnnotation& EnumDefinition::annotation() const return static_cast(*m_annotation); } -shared_ptr FunctionDefinition::functionType(bool _internal) const +shared_ptr FunctionDefinition::functionType(bool _internal) const { if (_internal) { @@ -285,7 +285,7 @@ shared_ptr FunctionDefinition::functionType(bool _internal) case Declaration::Visibility::Private: case Declaration::Visibility::Internal: case Declaration::Visibility::Public: - return make_shared(*this, _internal); + return make_shared(*this, _internal); case Declaration::Visibility::External: return {}; default: @@ -303,7 +303,7 @@ shared_ptr FunctionDefinition::functionType(bool _internal) return {}; case Declaration::Visibility::Public: case Declaration::Visibility::External: - return make_shared(*this, _internal); + return make_shared(*this, _internal); default: solAssert(false, "visibility() should not return a Visibility"); } @@ -347,6 +347,14 @@ TypePointer EventDefinition::type() const return make_shared(*this); } +std::shared_ptr EventDefinition::functionType(bool _internal) const +{ + if (_internal) + return make_shared(*this); + else + return {}; +} + EventDefinitionAnnotation& EventDefinition::annotation() const { if (!m_annotation) @@ -404,7 +412,7 @@ TypePointer VariableDeclaration::type() const return annotation().type; } -shared_ptr VariableDeclaration::functionType(bool _internal) const +shared_ptr VariableDeclaration::functionType(bool _internal) const { if (_internal) return {}; @@ -417,7 +425,7 @@ shared_ptr VariableDeclaration::functionType(bool _internal) return {}; case Declaration::Visibility::Public: case Declaration::Visibility::External: - return make_shared(*this); + return make_shared(*this); default: solAssert(false, "visibility() should not return a Visibility"); } diff --git a/libsolidity/ast/AST.h b/libsolidity/ast/AST.h index 088d68db..2d092408 100644 --- a/libsolidity/ast/AST.h +++ b/libsolidity/ast/AST.h @@ -173,7 +173,7 @@ public: /// @param _internal false indicates external interface is concerned, true indicates internal interface is concerned. /// @returns null when it is not accessible as a function. - virtual std::shared_ptr functionType(bool /*_internal*/) const { return {}; } + virtual std::shared_ptr functionType(bool /*_internal*/) const { return {}; } protected: virtual Visibility defaultVisibility() const { return Visibility::Public; } @@ -587,7 +587,7 @@ public: /// @param _internal false indicates external interface is concerned, true indicates internal interface is concerned. /// @returns null when it is not accessible as a function. - virtual std::shared_ptr functionType(bool /*_internal*/) const override; + virtual std::shared_ptr functionType(bool /*_internal*/) const override; virtual FunctionDefinitionAnnotation& annotation() const override; @@ -653,7 +653,7 @@ public: /// @param _internal false indicates external interface is concerned, true indicates internal interface is concerned. /// @returns null when it is not accessible as a function. - virtual std::shared_ptr functionType(bool /*_internal*/) const override; + virtual std::shared_ptr functionType(bool /*_internal*/) const override; virtual VariableDeclarationAnnotation& annotation() const override; @@ -752,6 +752,7 @@ public: bool isAnonymous() const { return m_anonymous; } virtual TypePointer type() const override; + virtual std::shared_ptr functionType(bool /*_internal*/) const override; virtual EventDefinitionAnnotation& annotation() const override; -- cgit From ee0bf07487068a46af0d286208a0224a58721527 Mon Sep 17 00:00:00 2001 From: Paweł Bylica Date: Thu, 12 Jan 2017 14:14:19 +0100 Subject: Travis CI: Install latest CMake --- .travis.yml | 2 ++ scripts/install_cmake.sh | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100755 scripts/install_cmake.sh diff --git a/.travis.yml b/.travis.yml index a50674c1..017f1a61 100644 --- a/.travis.yml +++ b/.travis.yml @@ -140,9 +140,11 @@ cache: directories: - boost_1_57_0 - build + - $HOME/.local install: - test $TRAVIS_INSTALL_DEPS != On || ./scripts/install_deps.sh + - test "$TRAVIS_OS_NAME" != "linux" || ./scripts/install_cmake.sh - echo -n "$TRAVIS_COMMIT" > commit_hash.txt before_script: - test $TRAVIS_EMSCRIPTEN != On || ./scripts/build_emscripten.sh diff --git a/scripts/install_cmake.sh b/scripts/install_cmake.sh new file mode 100755 index 00000000..00d013b9 --- /dev/null +++ b/scripts/install_cmake.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env sh + +# This script downloads the CMake binary and installs it in ~/.local directory +# (the cmake executable will be in ~/.local/bin). +# This is mostly suitable for CIs, not end users. + +set -e + +VERSION=3.7.1 +PREFIX=~/.local + +OS=$(uname -s) +case $OS in +Linux) SHA256=7b4b7a1d9f314f45722899c0521c261e4bfab4a6b532609e37fef391da6bade2;; +Darwin) SHA256=1851d1448964893fdc5a8c05863326119f397a3790e0c84c40b83499c7960267;; +esac + + +BIN=$PREFIX/bin + +if test -f $BIN/cmake && ($BIN/cmake --version | grep -q "$VERSION"); then + echo "CMake $VERSION already installed in $BIN" +else + FILE=cmake-$VERSION-$OS-x86_64.tar.gz + URL=https://cmake.org/files/v3.7/$FILE + ERROR=0 + TMPFILE=$(mktemp --tmpdir cmake-$VERSION-$OS-x86_64.XXXXXXXX.tar.gz) + echo "Downloading CMake ($URL)..." + wget "$URL" -O "$TMPFILE" -nv + if ! (shasum -a256 "$TMPFILE" | grep -q "$SHA256"); then + echo "Checksum mismatch ($TMPFILE)" + exit 1 + fi + mkdir -p "$PREFIX" + tar xzf "$TMPFILE" -C "$PREFIX" --strip 1 + rm $TMPFILE +fi -- cgit From afad40ac5a259cf60cd2f5c8b31495f3f64f3e8e Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 6 Jan 2017 11:33:08 +0100 Subject: Optimise AssemblyItem::m_data. --- cmake/EthCompilerSettings.cmake | 2 +- libevmasm/Assembly.cpp | 2 +- libevmasm/AssemblyItem.cpp | 8 +++---- libevmasm/AssemblyItem.h | 46 ++++++++++++++++++++++++++++++----------- libevmasm/ExpressionClasses.cpp | 23 +++++++++++++++++---- 5 files changed, 59 insertions(+), 22 deletions(-) diff --git a/cmake/EthCompilerSettings.cmake b/cmake/EthCompilerSettings.cmake index c734423b..65742dfb 100644 --- a/cmake/EthCompilerSettings.cmake +++ b/cmake/EthCompilerSettings.cmake @@ -71,7 +71,7 @@ if (("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU") OR ("${CMAKE_CXX_COMPILER_ID}" MA add_compile_options(-fPIC) # Configuration-specific compiler settings. - set(CMAKE_CXX_FLAGS_DEBUG "-Og -g -DETH_DEBUG") + set(CMAKE_CXX_FLAGS_DEBUG " -g -DETH_DEBUG") set(CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g") diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index f50a38a6..845abfd4 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -416,7 +416,7 @@ LinkerObject const& Assembly::assemble() const switch (i.type()) { case Operation: - ret.bytecode.push_back((byte)i.data()); + ret.bytecode.push_back((byte)i.instruction()); break; case PushString: { diff --git a/libevmasm/AssemblyItem.cpp b/libevmasm/AssemblyItem.cpp index b790e094..6c7d5425 100644 --- a/libevmasm/AssemblyItem.cpp +++ b/libevmasm/AssemblyItem.cpp @@ -29,19 +29,19 @@ using namespace dev::eth; AssemblyItem AssemblyItem::toSubAssemblyTag(size_t _subId) const { - assertThrow(m_data < (u256(1) << 64), Exception, "Tag already has subassembly set."); + assertThrow(data() < (u256(1) << 64), Exception, "Tag already has subassembly set."); assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); AssemblyItem r = *this; r.m_type = PushTag; - r.setPushTagSubIdAndTag(_subId, size_t(m_data)); + r.setPushTagSubIdAndTag(_subId, size_t(data())); return r; } pair AssemblyItem::splitForeignPushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); - return make_pair(size_t(m_data / (u256(1) << 64)) - 1, size_t(m_data)); + return make_pair(size_t((data()) / (u256(1) << 64)) - 1, size_t(data())); } void AssemblyItem::setPushTagSubIdAndTag(size_t _subId, size_t _tag) @@ -60,7 +60,7 @@ unsigned AssemblyItem::bytesRequired(unsigned _addressLength) const case PushString: return 33; case Push: - return 1 + max(1, dev::bytesRequired(m_data)); + return 1 + max(1, dev::bytesRequired(data())); case PushSubSize: case PushProgramSize: return 4; // worst case: a 16MB program diff --git a/libevmasm/AssemblyItem.h b/libevmasm/AssemblyItem.h index cddfb17c..7fcaa145 100644 --- a/libevmasm/AssemblyItem.h +++ b/libevmasm/AssemblyItem.h @@ -59,16 +59,22 @@ public: AssemblyItem(u256 _push, SourceLocation const& _location = SourceLocation()): AssemblyItem(Push, _push, _location) { } AssemblyItem(solidity::Instruction _i, SourceLocation const& _location = SourceLocation()): - AssemblyItem(Operation, byte(_i), _location) { } + m_type(Operation), + m_instruction(_i), + m_location(_location) + {} AssemblyItem(AssemblyItemType _type, u256 _data = 0, SourceLocation const& _location = SourceLocation()): m_type(_type), - m_data(_data), m_location(_location) { + if (m_type == Operation) + m_instruction = Instruction(byte(_data)); + else + m_data = std::make_shared(_data); } - AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, m_data); } - AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, m_data); } + AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, data()); } + AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, data()); } /// Converts the tag to a subassembly tag. This has to be called in order to move a tag across assemblies. /// @param _subId the identifier of the subassembly the tag is taken from. AssemblyItem toSubAssemblyTag(size_t _subId) const; @@ -79,18 +85,34 @@ public: void setPushTagSubIdAndTag(size_t _subId, size_t _tag); AssemblyItemType type() const { return m_type; } - u256 const& data() const { return m_data; } - void setType(AssemblyItemType const _type) { m_type = _type; } - void setData(u256 const& _data) { m_data = _data; } + u256 const& data() const { assertThrow(m_type != Operation, Exception, ""); return *m_data; } + //void setType(AssemblyItemType const _type) { m_type = _type; } + void setData(u256 const& _data) { assertThrow(m_type != Operation, Exception, ""); m_data = std::make_shared(_data); } /// @returns the instruction of this item (only valid if type() == Operation) - Instruction instruction() const { return Instruction(byte(m_data)); } + Instruction instruction() const { assertThrow(m_type == Operation, Exception, ""); return m_instruction; } /// @returns true if the type and data of the items are equal. - bool operator==(AssemblyItem const& _other) const { return m_type == _other.m_type && m_data == _other.m_data; } + bool operator==(AssemblyItem const& _other) const + { + if (type() != _other.type()) + return false; + if (type() == Operation) + return instruction() == _other.instruction(); + else + return data() == _other.data(); + } bool operator!=(AssemblyItem const& _other) const { return !operator==(_other); } /// Less-than operator compatible with operator==. - bool operator<(AssemblyItem const& _other) const { return std::tie(m_type, m_data) < std::tie(_other.m_type, _other.m_data); } + bool operator<(AssemblyItem const& _other) const + { + if (type() != _other.type()) + return type() < _other.type(); + else if (type() == Operation) + return instruction() < _other.instruction(); + else + return data() < _other.data(); + } /// @returns an upper bound for the number of bytes required by this item, assuming that /// the value of a jump tag takes @a _addressLength bytes. @@ -100,7 +122,6 @@ public: /// @returns true if the assembly item can be used in a functional context. bool canBeFunctional() const; - bool match(AssemblyItem const& _i) const { return _i.m_type == UndefinedItem || (m_type == _i.m_type && (m_type != Operation || m_data == _i.m_data)); } void setLocation(SourceLocation const& _location) { m_location = _location; } SourceLocation const& location() const { return m_location; } @@ -115,7 +136,8 @@ public: private: AssemblyItemType m_type; - u256 m_data; + Instruction m_instruction; ///< Only valid if m_type == Operation + std::shared_ptr m_data; ///< Only valid if m_type != Operation SourceLocation m_location; JumpType m_jumpType = JumpType::Ordinary; /// Pushed value for operations with data to be determined during assembly stage, diff --git a/libevmasm/ExpressionClasses.cpp b/libevmasm/ExpressionClasses.cpp index d5ccd7e3..b903ae04 100644 --- a/libevmasm/ExpressionClasses.cpp +++ b/libevmasm/ExpressionClasses.cpp @@ -40,8 +40,18 @@ bool ExpressionClasses::Expression::operator<(ExpressionClasses::Expression cons assertThrow(!!item && !!_other.item, OptimizerException, ""); auto type = item->type(); auto otherType = _other.item->type(); - return std::tie(type, item->data(), arguments, sequenceNumber) < - std::tie(otherType, _other.item->data(), _other.arguments, _other.sequenceNumber); + if (type != otherType) + return type < otherType; + else if (type == Operation) + { + auto instr = item->instruction(); + auto otherInstr = _other.item->instruction(); + return std::tie(instr, arguments, sequenceNumber) < + std::tie(otherInstr, _other.arguments, _other.sequenceNumber); + } + else + return std::tie(item->data(), arguments, sequenceNumber) < + std::tie(_other.item->data(), _other.arguments, _other.sequenceNumber); } ExpressionClasses::Id ExpressionClasses::find( @@ -479,8 +489,13 @@ bool Pattern::matchesBaseItem(AssemblyItem const* _item) const return false; if (m_type != _item->type()) return false; - if (m_requireDataMatch && m_data != _item->data()) - return false; + if (m_requireDataMatch) + { + if (m_type == Operation) + return m_data == u256(byte(_item->instruction())); + else + return m_data == _item->data(); + } return true; } -- cgit From 652d8dab191fcd50ce59b049177ae3281255f309 Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 6 Jan 2017 11:25:02 +0100 Subject: Optimize pattern matcher. --- libevmasm/ExpressionClasses.cpp | 32 ++++++++++++++++++-------------- libevmasm/ExpressionClasses.h | 6 ++++-- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/libevmasm/ExpressionClasses.cpp b/libevmasm/ExpressionClasses.cpp index b903ae04..fccac1b0 100644 --- a/libevmasm/ExpressionClasses.cpp +++ b/libevmasm/ExpressionClasses.cpp @@ -185,7 +185,7 @@ class Rules: public boost::noncopyable public: Rules(); void resetMatchGroups() { m_matchGroups.clear(); } - vector>> rules() const { return m_rules; } + vector>> const& rules() const { return m_rules; } private: using Expression = ExpressionClasses::Expression; @@ -417,8 +417,7 @@ ExpressionClasses::Id ExpressionClasses::rebuildExpression(ExpressionTemplate co Pattern::Pattern(Instruction _instruction, std::vector const& _arguments): m_type(Operation), - m_requireDataMatch(true), - m_data(_instruction), + m_instruction(_instruction), m_arguments(_arguments) { } @@ -449,7 +448,10 @@ bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes AssemblyItem Pattern::toAssemblyItem(SourceLocation const& _location) const { - return AssemblyItem(m_type, m_data, _location); + if (m_type == Operation) + return AssemblyItem(m_instruction, _location); + else + return AssemblyItem(m_type, data(), _location); } string Pattern::toString() const @@ -458,16 +460,16 @@ string Pattern::toString() const switch (m_type) { case Operation: - s << instructionInfo(Instruction(unsigned(m_data))).name; + s << instructionInfo(m_instruction).name; break; case Push: - s << "PUSH " << hex << m_data; + s << "PUSH " << hex << data(); break; case UndefinedItem: s << "ANY"; break; default: - s << "t=" << dec << m_type << " d=" << hex << m_data; + s << "t=" << dec << m_type << " d=" << hex << data(); break; } if (!m_requireDataMatch) @@ -489,13 +491,10 @@ bool Pattern::matchesBaseItem(AssemblyItem const* _item) const return false; if (m_type != _item->type()) return false; - if (m_requireDataMatch) - { - if (m_type == Operation) - return m_data == u256(byte(_item->instruction())); - else - return m_data == _item->data(); - } + else if (m_type == Operation) + return m_instruction == _item->instruction(); + else if (m_requireDataMatch) + return data() == _item->data(); return true; } @@ -507,6 +506,11 @@ Pattern::Expression const& Pattern::matchGroupValue() const return *(*m_matchGroups)[m_matchGroup]; } +u256 const& Pattern::data() const +{ + assertThrow(m_data, OptimizerException, ""); + return *m_data; +} ExpressionTemplate::ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location) { diff --git a/libevmasm/ExpressionClasses.h b/libevmasm/ExpressionClasses.h index 11a698dd..c91b227f 100644 --- a/libevmasm/ExpressionClasses.h +++ b/libevmasm/ExpressionClasses.h @@ -135,7 +135,7 @@ public: // Matches a specific constant value. Pattern(unsigned _value): Pattern(u256(_value)) {} // Matches a specific constant value. - Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(_value) {} + Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(std::make_shared(_value)) {} // Matches a specific assembly item type or anything if not given. Pattern(AssemblyItemType _type = UndefinedItem): m_type(_type) {} // Matches a given instruction with given arguments @@ -160,10 +160,12 @@ public: private: bool matchesBaseItem(AssemblyItem const* _item) const; Expression const& matchGroupValue() const; + u256 const& data() const; AssemblyItemType m_type; bool m_requireDataMatch = false; - u256 m_data = 0; + Instruction m_instruction; ///< Only valid if m_type is Operation + std::shared_ptr m_data; ///< Only valid if m_type is not Operation std::vector m_arguments; unsigned m_matchGroup = 0; std::map* m_matchGroups = nullptr; -- cgit From a3b01eca27b19311a05b9c7b7e2ed8eb51b0e51c Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 6 Jan 2017 15:17:16 +0100 Subject: Do not evaluate strings if assertion succeeds. --- libdevcore/Assertions.h | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/libdevcore/Assertions.h b/libdevcore/Assertions.h index 05e0b0e5..0fb5837c 100644 --- a/libdevcore/Assertions.h +++ b/libdevcore/Assertions.h @@ -73,27 +73,19 @@ inline bool assertEqualAux(A const& _a, B const& _b, char const* _aStr, char con /// Use it as assertThrow(1 == 1, ExceptionType, "Mathematics is wrong."); /// Do NOT supply an exception object as the second parameter. #define assertThrow(_condition, _ExceptionType, _description) \ - ::dev::assertThrowAux<_ExceptionType>(!!(_condition), _description, __LINE__, __FILE__, ETH_FUNC) + do \ + { \ + if (!(_condition)) \ + ::boost::throw_exception( \ + _ExceptionType() << \ + ::dev::errinfo_comment(_description) << \ + ::boost::throw_function(ETH_FUNC) << \ + ::boost::throw_file(__FILE__) << \ + ::boost::throw_line(__LINE__) \ + ); \ + } \ + while (false) using errinfo_comment = boost::error_info; -template -inline void assertThrowAux( - bool _condition, - ::std::string const& _errorDescription, - unsigned _line, - char const* _file, - char const* _function -) -{ - if (!_condition) - ::boost::throw_exception( - _ExceptionType() << - ::dev::errinfo_comment(_errorDescription) << - ::boost::throw_function(_function) << - ::boost::throw_file(_file) << - ::boost::throw_line(_line) - ); -} - } -- cgit From f1a4976ce65bd4730f72b0805635e818ad6334f7 Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 6 Jan 2017 16:16:11 +0100 Subject: Optimise simplification rules (sort into bins). --- libevmasm/ExpressionClasses.cpp | 338 ++--------------------------------- libevmasm/ExpressionClasses.h | 67 ------- libevmasm/SimplificationRules.cpp | 363 ++++++++++++++++++++++++++++++++++++++ libevmasm/SimplificationRules.h | 140 +++++++++++++++ 4 files changed, 514 insertions(+), 394 deletions(-) create mode 100644 libevmasm/SimplificationRules.cpp create mode 100644 libevmasm/SimplificationRules.h diff --git a/libevmasm/ExpressionClasses.cpp b/libevmasm/ExpressionClasses.cpp index fccac1b0..fc283b0b 100644 --- a/libevmasm/ExpressionClasses.cpp +++ b/libevmasm/ExpressionClasses.cpp @@ -29,6 +29,7 @@ #include #include #include +#include using namespace std; using namespace dev; @@ -180,191 +181,6 @@ string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const return str.str(); } -class Rules: public boost::noncopyable -{ -public: - Rules(); - void resetMatchGroups() { m_matchGroups.clear(); } - vector>> const& rules() const { return m_rules; } - -private: - using Expression = ExpressionClasses::Expression; - map m_matchGroups; - vector>> m_rules; -}; - -template S divWorkaround(S const& _a, S const& _b) -{ - return (S)(bigint(_a) / bigint(_b)); -} - -template S modWorkaround(S const& _a, S const& _b) -{ - return (S)(bigint(_a) % bigint(_b)); -} - -Rules::Rules() -{ - // Multiple occurences of one of these inside one rule must match the same equivalence class. - // Constants. - Pattern A(Push); - Pattern B(Push); - Pattern C(Push); - // Anything. - Pattern X; - Pattern Y; - Pattern Z; - A.setMatchGroup(1, m_matchGroups); - B.setMatchGroup(2, m_matchGroups); - C.setMatchGroup(3, m_matchGroups); - X.setMatchGroup(4, m_matchGroups); - Y.setMatchGroup(5, m_matchGroups); - Z.setMatchGroup(6, m_matchGroups); - - m_rules = vector>>{ - // arithmetics on constants - {{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }}, - {{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }}, - {{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }}, - {{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : divWorkaround(A.d(), B.d()); }}, - {{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(divWorkaround(u2s(A.d()), u2s(B.d()))); }}, - {{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : modWorkaround(A.d(), B.d()); }}, - {{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(modWorkaround(u2s(A.d()), u2s(B.d()))); }}, - {{Instruction::EXP, {A, B}}, [=]{ return u256(boost::multiprecision::powm(bigint(A.d()), bigint(B.d()), bigint(1) << 256)); }}, - {{Instruction::NOT, {A}}, [=]{ return ~A.d(); }}, - {{Instruction::LT, {A, B}}, [=]() { return A.d() < B.d() ? u256(1) : 0; }}, - {{Instruction::GT, {A, B}}, [=]() -> u256 { return A.d() > B.d() ? 1 : 0; }}, - {{Instruction::SLT, {A, B}}, [=]() -> u256 { return u2s(A.d()) < u2s(B.d()) ? 1 : 0; }}, - {{Instruction::SGT, {A, B}}, [=]() -> u256 { return u2s(A.d()) > u2s(B.d()) ? 1 : 0; }}, - {{Instruction::EQ, {A, B}}, [=]() -> u256 { return A.d() == B.d() ? 1 : 0; }}, - {{Instruction::ISZERO, {A}}, [=]() -> u256 { return A.d() == 0 ? 1 : 0; }}, - {{Instruction::AND, {A, B}}, [=]{ return A.d() & B.d(); }}, - {{Instruction::OR, {A, B}}, [=]{ return A.d() | B.d(); }}, - {{Instruction::XOR, {A, B}}, [=]{ return A.d() ^ B.d(); }}, - {{Instruction::BYTE, {A, B}}, [=]{ return A.d() >= 32 ? 0 : (B.d() >> unsigned(8 * (31 - A.d()))) & 0xff; }}, - {{Instruction::ADDMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) + bigint(B.d())) % C.d()); }}, - {{Instruction::MULMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) * bigint(B.d())) % C.d()); }}, - {{Instruction::MULMOD, {A, B, C}}, [=]{ return A.d() * B.d(); }}, - {{Instruction::SIGNEXTEND, {A, B}}, [=]() -> u256 { - if (A.d() >= 31) - return B.d(); - unsigned testBit = unsigned(A.d()) * 8 + 7; - u256 mask = (u256(1) << testBit) - 1; - return u256(boost::multiprecision::bit_test(B.d(), testBit) ? B.d() | ~mask : B.d() & mask); - }}, - - // invariants involving known constants - {{Instruction::ADD, {X, 0}}, [=]{ return X; }}, - {{Instruction::SUB, {X, 0}}, [=]{ return X; }}, - {{Instruction::MUL, {X, 1}}, [=]{ return X; }}, - {{Instruction::DIV, {X, 1}}, [=]{ return X; }}, - {{Instruction::SDIV, {X, 1}}, [=]{ return X; }}, - {{Instruction::OR, {X, 0}}, [=]{ return X; }}, - {{Instruction::XOR, {X, 0}}, [=]{ return X; }}, - {{Instruction::AND, {X, ~u256(0)}}, [=]{ return X; }}, - {{Instruction::AND, {X, 0}}, [=]{ return u256(0); }}, - {{Instruction::MUL, {X, 0}}, [=]{ return u256(0); }}, - {{Instruction::DIV, {X, 0}}, [=]{ return u256(0); }}, - {{Instruction::DIV, {0, X}}, [=]{ return u256(0); }}, - {{Instruction::MOD, {X, 0}}, [=]{ return u256(0); }}, - {{Instruction::MOD, {0, X}}, [=]{ return u256(0); }}, - {{Instruction::OR, {X, ~u256(0)}}, [=]{ return ~u256(0); }}, - {{Instruction::EQ, {X, 0}}, [=]() -> Pattern { return {Instruction::ISZERO, {X}}; } }, - // operations involving an expression and itself - {{Instruction::AND, {X, X}}, [=]{ return X; }}, - {{Instruction::OR, {X, X}}, [=]{ return X; }}, - {{Instruction::XOR, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::SUB, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::EQ, {X, X}}, [=]{ return u256(1); }}, - {{Instruction::LT, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::SLT, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::GT, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::SGT, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::MOD, {X, X}}, [=]{ return u256(0); }}, - - {{Instruction::NOT, {{Instruction::NOT, {X}}}}, [=]{ return X; }}, - {{Instruction::XOR, {{{X}, {Instruction::XOR, {X, Y}}}}}, [=]{ return Y; }}, - {{Instruction::OR, {{{X}, {Instruction::AND, {X, Y}}}}}, [=]{ return X; }}, - {{Instruction::AND, {{{X}, {Instruction::OR, {X, Y}}}}}, [=]{ return X; }}, - {{Instruction::AND, {{{X}, {Instruction::NOT, {X}}}}}, [=]{ return u256(0); }}, - {{Instruction::OR, {{{X}, {Instruction::NOT, {X}}}}}, [=]{ return ~u256(0); }}, - }; - // Double negation of opcodes with binary result - for (auto const& op: vector{ - Instruction::EQ, - Instruction::LT, - Instruction::SLT, - Instruction::GT, - Instruction::SGT - }) - m_rules.push_back({ - {Instruction::ISZERO, {{Instruction::ISZERO, {{op, {X, Y}}}}}}, - [=]() -> Pattern { return {op, {X, Y}}; } - }); - m_rules.push_back({ - {Instruction::ISZERO, {{Instruction::ISZERO, {{Instruction::ISZERO, {X}}}}}}, - [=]() -> Pattern { return {Instruction::ISZERO, {X}}; } - }); - m_rules.push_back({ - {Instruction::ISZERO, {{Instruction::XOR, {X, Y}}}}, - [=]() -> Pattern { return { Instruction::EQ, {X, Y} }; } - }); - // Associative operations - for (auto const& opFun: vector>>{ - {Instruction::ADD, plus()}, - {Instruction::MUL, multiplies()}, - {Instruction::AND, bit_and()}, - {Instruction::OR, bit_or()}, - {Instruction::XOR, bit_xor()} - }) - { - auto op = opFun.first; - auto fun = opFun.second; - // Moving constants to the outside, order matters here! - // we need actions that return expressions (or patterns?) here, and we need also reversed rules - // (X+A)+B -> X+(A+B) - m_rules += vector>>{{ - {op, {{op, {X, A}}, B}}, - [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } - }, { - // X+(Y+A) -> (X+Y)+A - {op, {{op, {X, A}}, Y}}, - [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } - }, { - // For now, we still need explicit commutativity for the inner pattern - {op, {{op, {A, X}}, B}}, - [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } - }, { - {op, {{op, {A, X}}, Y}}, - [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } - }}; - } - // move constants across subtractions - m_rules += vector>>{ - { - // X - A -> X + (-A) - {Instruction::SUB, {X, A}}, - [=]() -> Pattern { return {Instruction::ADD, {X, 0 - A.d()}}; } - }, { - // (X + A) - Y -> (X - Y) + A - {Instruction::SUB, {{Instruction::ADD, {X, A}}, Y}}, - [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } - }, { - // (A + X) - Y -> (X - Y) + A - {Instruction::SUB, {{Instruction::ADD, {A, X}}, Y}}, - [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } - }, { - // X - (Y + A) -> (X - Y) + (-A) - {Instruction::SUB, {X, {Instruction::ADD, {Y, A}}}}, - [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } - }, { - // X - (A + Y) -> (X - Y) + (-A) - {Instruction::SUB, {X, {Instruction::ADD, {A, Y}}}}, - [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } - } - }; -} - ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, bool _secondRun) { static Rules rules; @@ -376,21 +192,17 @@ ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, ) return -1; - for (auto const& rule: rules.rules()) + if (auto match = rules.findFirstMatch(_expr, *this)) { - rules.resetMatchGroups(); - if (rule.first.matches(_expr, *this)) - { - // Debug info - //cout << "Simplifying " << *_expr.item << "("; - //for (Id arg: _expr.arguments) - // cout << fullDAGToString(arg) << ", "; - //cout << ")" << endl; - //cout << "with rule " << rule.first.toString() << endl; - //ExpressionTemplate t(rule.second()); - //cout << "to " << rule.second().toString() << endl; - return rebuildExpression(ExpressionTemplate(rule.second(), _expr.item->location())); - } + // Debug info + //cout << "Simplifying " << *_expr.item << "("; + //for (Id arg: _expr.arguments) + // cout << fullDAGToString(arg) << ", "; + //cout << ")" << endl; + //cout << "with rule " << match->first.toString() << endl; + //ExpressionTemplate t(match->second()); + //cout << "to " << match->second().toString() << endl; + return rebuildExpression(ExpressionTemplate(match->second(), _expr.item->location())); } if (!_secondRun && _expr.arguments.size() == 2 && SemanticInformation::isCommutativeOperation(*_expr.item)) @@ -413,131 +225,3 @@ ExpressionClasses::Id ExpressionClasses::rebuildExpression(ExpressionTemplate co arguments.push_back(rebuildExpression(t)); return find(_template.item, arguments); } - - -Pattern::Pattern(Instruction _instruction, std::vector const& _arguments): - m_type(Operation), - m_instruction(_instruction), - m_arguments(_arguments) -{ -} - -void Pattern::setMatchGroup(unsigned _group, map& _matchGroups) -{ - m_matchGroup = _group; - m_matchGroups = &_matchGroups; -} - -bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const -{ - if (!matchesBaseItem(_expr.item)) - return false; - if (m_matchGroup) - { - if (!m_matchGroups->count(m_matchGroup)) - (*m_matchGroups)[m_matchGroup] = &_expr; - else if ((*m_matchGroups)[m_matchGroup]->id != _expr.id) - return false; - } - assertThrow(m_arguments.size() == 0 || _expr.arguments.size() == m_arguments.size(), OptimizerException, ""); - for (size_t i = 0; i < m_arguments.size(); ++i) - if (!m_arguments[i].matches(_classes.representative(_expr.arguments[i]), _classes)) - return false; - return true; -} - -AssemblyItem Pattern::toAssemblyItem(SourceLocation const& _location) const -{ - if (m_type == Operation) - return AssemblyItem(m_instruction, _location); - else - return AssemblyItem(m_type, data(), _location); -} - -string Pattern::toString() const -{ - stringstream s; - switch (m_type) - { - case Operation: - s << instructionInfo(m_instruction).name; - break; - case Push: - s << "PUSH " << hex << data(); - break; - case UndefinedItem: - s << "ANY"; - break; - default: - s << "t=" << dec << m_type << " d=" << hex << data(); - break; - } - if (!m_requireDataMatch) - s << " ~"; - if (m_matchGroup) - s << "[" << dec << m_matchGroup << "]"; - s << "("; - for (Pattern const& p: m_arguments) - s << p.toString() << ", "; - s << ")"; - return s.str(); -} - -bool Pattern::matchesBaseItem(AssemblyItem const* _item) const -{ - if (m_type == UndefinedItem) - return true; - if (!_item) - return false; - if (m_type != _item->type()) - return false; - else if (m_type == Operation) - return m_instruction == _item->instruction(); - else if (m_requireDataMatch) - return data() == _item->data(); - return true; -} - -Pattern::Expression const& Pattern::matchGroupValue() const -{ - assertThrow(m_matchGroup > 0, OptimizerException, ""); - assertThrow(!!m_matchGroups, OptimizerException, ""); - assertThrow((*m_matchGroups)[m_matchGroup], OptimizerException, ""); - return *(*m_matchGroups)[m_matchGroup]; -} - -u256 const& Pattern::data() const -{ - assertThrow(m_data, OptimizerException, ""); - return *m_data; -} - -ExpressionTemplate::ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location) -{ - if (_pattern.matchGroup()) - { - hasId = true; - id = _pattern.id(); - } - else - { - hasId = false; - item = _pattern.toAssemblyItem(_location); - } - for (auto const& arg: _pattern.arguments()) - arguments.push_back(ExpressionTemplate(arg, _location)); -} - -string ExpressionTemplate::toString() const -{ - stringstream s; - if (hasId) - s << id; - else - s << item; - s << "("; - for (auto const& arg: arguments) - s << arg.toString(); - s << ")"; - return s.str(); -} diff --git a/libevmasm/ExpressionClasses.h b/libevmasm/ExpressionClasses.h index c91b227f..5d53b292 100644 --- a/libevmasm/ExpressionClasses.h +++ b/libevmasm/ExpressionClasses.h @@ -121,72 +121,5 @@ private: std::vector> m_spareAssemblyItems; }; -/** - * Pattern to match against an expression. - * Also stores matched expressions to retrieve them later, for constructing new expressions using - * ExpressionTemplate. - */ -class Pattern -{ -public: - using Expression = ExpressionClasses::Expression; - using Id = ExpressionClasses::Id; - - // Matches a specific constant value. - Pattern(unsigned _value): Pattern(u256(_value)) {} - // Matches a specific constant value. - Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(std::make_shared(_value)) {} - // Matches a specific assembly item type or anything if not given. - Pattern(AssemblyItemType _type = UndefinedItem): m_type(_type) {} - // Matches a given instruction with given arguments - Pattern(Instruction _instruction, std::vector const& _arguments = {}); - /// Sets this pattern to be part of the match group with the identifier @a _group. - /// Inside one rule, all patterns in the same match group have to match expressions from the - /// same expression equivalence class. - void setMatchGroup(unsigned _group, std::map& _matchGroups); - unsigned matchGroup() const { return m_matchGroup; } - bool matches(Expression const& _expr, ExpressionClasses const& _classes) const; - - AssemblyItem toAssemblyItem(SourceLocation const& _location) const; - std::vector arguments() const { return m_arguments; } - - /// @returns the id of the matched expression if this pattern is part of a match group. - Id id() const { return matchGroupValue().id; } - /// @returns the data of the matched expression if this pattern is part of a match group. - u256 const& d() const { return matchGroupValue().item->data(); } - - std::string toString() const; - -private: - bool matchesBaseItem(AssemblyItem const* _item) const; - Expression const& matchGroupValue() const; - u256 const& data() const; - - AssemblyItemType m_type; - bool m_requireDataMatch = false; - Instruction m_instruction; ///< Only valid if m_type is Operation - std::shared_ptr m_data; ///< Only valid if m_type is not Operation - std::vector m_arguments; - unsigned m_matchGroup = 0; - std::map* m_matchGroups = nullptr; -}; - -/** - * Template for a new expression that can be built from matched patterns. - */ -struct ExpressionTemplate -{ - using Expression = ExpressionClasses::Expression; - using Id = ExpressionClasses::Id; - explicit ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location); - std::string toString() const; - bool hasId = false; - /// Id of the matched expression, if available. - Id id = Id(-1); - // Otherwise, assembly item. - AssemblyItem item = UndefinedItem; - std::vector arguments; -}; - } } diff --git a/libevmasm/SimplificationRules.cpp b/libevmasm/SimplificationRules.cpp new file mode 100644 index 00000000..bb950400 --- /dev/null +++ b/libevmasm/SimplificationRules.cpp @@ -0,0 +1,363 @@ +/* + This file is part of solidity. + + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with solidity. If not, see . +*/ +/** + * @file ExpressionClasses.cpp + * @author Christian + * @date 2015 + * Container for equivalence classes of expressions for use in common subexpression elimination. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace dev; +using namespace dev::eth; + + +pair > const* Rules::findFirstMatch( + Expression const& _expr, + ExpressionClasses const& _classes +) +{ + resetMatchGroups(); + + assertThrow(_expr.item, OptimizerException, ""); + for (auto const& rule: m_rules[byte(_expr.item->instruction())]) + { + if (rule.first.matches(_expr, _classes)) + return &rule; + } + return nullptr; +} + +void Rules::addRules(std::vector > > const& _rules) +{ + for (auto const& r: _rules) + addRule(r); +} + +void Rules::addRule(std::pair > const& _rule) +{ + m_rules[byte(_rule.first.instruction())].push_back(_rule); +} + +template S divWorkaround(S const& _a, S const& _b) +{ + return (S)(bigint(_a) / bigint(_b)); +} + +template S modWorkaround(S const& _a, S const& _b) +{ + return (S)(bigint(_a) % bigint(_b)); +} + +Rules::Rules() +{ + // Multiple occurences of one of these inside one rule must match the same equivalence class. + // Constants. + Pattern A(Push); + Pattern B(Push); + Pattern C(Push); + // Anything. + Pattern X; + Pattern Y; + Pattern Z; + A.setMatchGroup(1, m_matchGroups); + B.setMatchGroup(2, m_matchGroups); + C.setMatchGroup(3, m_matchGroups); + X.setMatchGroup(4, m_matchGroups); + Y.setMatchGroup(5, m_matchGroups); + Z.setMatchGroup(6, m_matchGroups); + + addRules(vector>>{ + // arithmetics on constants + {{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }}, + {{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }}, + {{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }}, + {{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : divWorkaround(A.d(), B.d()); }}, + {{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(divWorkaround(u2s(A.d()), u2s(B.d()))); }}, + {{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : modWorkaround(A.d(), B.d()); }}, + {{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(modWorkaround(u2s(A.d()), u2s(B.d()))); }}, + {{Instruction::EXP, {A, B}}, [=]{ return u256(boost::multiprecision::powm(bigint(A.d()), bigint(B.d()), bigint(1) << 256)); }}, + {{Instruction::NOT, {A}}, [=]{ return ~A.d(); }}, + {{Instruction::LT, {A, B}}, [=]() { return A.d() < B.d() ? u256(1) : 0; }}, + {{Instruction::GT, {A, B}}, [=]() -> u256 { return A.d() > B.d() ? 1 : 0; }}, + {{Instruction::SLT, {A, B}}, [=]() -> u256 { return u2s(A.d()) < u2s(B.d()) ? 1 : 0; }}, + {{Instruction::SGT, {A, B}}, [=]() -> u256 { return u2s(A.d()) > u2s(B.d()) ? 1 : 0; }}, + {{Instruction::EQ, {A, B}}, [=]() -> u256 { return A.d() == B.d() ? 1 : 0; }}, + {{Instruction::ISZERO, {A}}, [=]() -> u256 { return A.d() == 0 ? 1 : 0; }}, + {{Instruction::AND, {A, B}}, [=]{ return A.d() & B.d(); }}, + {{Instruction::OR, {A, B}}, [=]{ return A.d() | B.d(); }}, + {{Instruction::XOR, {A, B}}, [=]{ return A.d() ^ B.d(); }}, + {{Instruction::BYTE, {A, B}}, [=]{ return A.d() >= 32 ? 0 : (B.d() >> unsigned(8 * (31 - A.d()))) & 0xff; }}, + {{Instruction::ADDMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) + bigint(B.d())) % C.d()); }}, + {{Instruction::MULMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) * bigint(B.d())) % C.d()); }}, + {{Instruction::MULMOD, {A, B, C}}, [=]{ return A.d() * B.d(); }}, + {{Instruction::SIGNEXTEND, {A, B}}, [=]() -> u256 { + if (A.d() >= 31) + return B.d(); + unsigned testBit = unsigned(A.d()) * 8 + 7; + u256 mask = (u256(1) << testBit) - 1; + return u256(boost::multiprecision::bit_test(B.d(), testBit) ? B.d() | ~mask : B.d() & mask); + }}, + + // invariants involving known constants + {{Instruction::ADD, {X, 0}}, [=]{ return X; }}, + {{Instruction::SUB, {X, 0}}, [=]{ return X; }}, + {{Instruction::MUL, {X, 1}}, [=]{ return X; }}, + {{Instruction::DIV, {X, 1}}, [=]{ return X; }}, + {{Instruction::SDIV, {X, 1}}, [=]{ return X; }}, + {{Instruction::OR, {X, 0}}, [=]{ return X; }}, + {{Instruction::XOR, {X, 0}}, [=]{ return X; }}, + {{Instruction::AND, {X, ~u256(0)}}, [=]{ return X; }}, + {{Instruction::AND, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::MUL, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::DIV, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::DIV, {0, X}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {0, X}}, [=]{ return u256(0); }}, + {{Instruction::OR, {X, ~u256(0)}}, [=]{ return ~u256(0); }}, + {{Instruction::EQ, {X, 0}}, [=]() -> Pattern { return {Instruction::ISZERO, {X}}; } }, + // operations involving an expression and itself + {{Instruction::AND, {X, X}}, [=]{ return X; }}, + {{Instruction::OR, {X, X}}, [=]{ return X; }}, + {{Instruction::XOR, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SUB, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::EQ, {X, X}}, [=]{ return u256(1); }}, + {{Instruction::LT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SLT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::GT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SGT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {X, X}}, [=]{ return u256(0); }}, + + {{Instruction::NOT, {{Instruction::NOT, {X}}}}, [=]{ return X; }}, + {{Instruction::XOR, {{{X}, {Instruction::XOR, {X, Y}}}}}, [=]{ return Y; }}, + {{Instruction::OR, {{{X}, {Instruction::AND, {X, Y}}}}}, [=]{ return X; }}, + {{Instruction::AND, {{{X}, {Instruction::OR, {X, Y}}}}}, [=]{ return X; }}, + {{Instruction::AND, {{{X}, {Instruction::NOT, {X}}}}}, [=]{ return u256(0); }}, + {{Instruction::OR, {{{X}, {Instruction::NOT, {X}}}}}, [=]{ return ~u256(0); }}, + }); + // Double negation of opcodes with binary result + for (auto const& op: vector{ + Instruction::EQ, + Instruction::LT, + Instruction::SLT, + Instruction::GT, + Instruction::SGT + }) + addRule({ + {Instruction::ISZERO, {{Instruction::ISZERO, {{op, {X, Y}}}}}}, + [=]() -> Pattern { return {op, {X, Y}}; } + }); + addRule({ + {Instruction::ISZERO, {{Instruction::ISZERO, {{Instruction::ISZERO, {X}}}}}}, + [=]() -> Pattern { return {Instruction::ISZERO, {X}}; } + }); + addRule({ + {Instruction::ISZERO, {{Instruction::XOR, {X, Y}}}}, + [=]() -> Pattern { return { Instruction::EQ, {X, Y} }; } + }); + // Associative operations + for (auto const& opFun: vector>>{ + {Instruction::ADD, plus()}, + {Instruction::MUL, multiplies()}, + {Instruction::AND, bit_and()}, + {Instruction::OR, bit_or()}, + {Instruction::XOR, bit_xor()} + }) + { + auto op = opFun.first; + auto fun = opFun.second; + // Moving constants to the outside, order matters here! + // we need actions that return expressions (or patterns?) here, and we need also reversed rules + // (X+A)+B -> X+(A+B) + addRules(vector>>{{ + {op, {{op, {X, A}}, B}}, + [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } + }, { + // X+(Y+A) -> (X+Y)+A + {op, {{op, {X, A}}, Y}}, + [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } + }, { + // For now, we still need explicit commutativity for the inner pattern + {op, {{op, {A, X}}, B}}, + [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } + }, { + {op, {{op, {A, X}}, Y}}, + [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } + }}); + } + // move constants across subtractions + addRules(vector>>{ + { + // X - A -> X + (-A) + {Instruction::SUB, {X, A}}, + [=]() -> Pattern { return {Instruction::ADD, {X, 0 - A.d()}}; } + }, { + // (X + A) - Y -> (X - Y) + A + {Instruction::SUB, {{Instruction::ADD, {X, A}}, Y}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } + }, { + // (A + X) - Y -> (X - Y) + A + {Instruction::SUB, {{Instruction::ADD, {A, X}}, Y}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } + }, { + // X - (Y + A) -> (X - Y) + (-A) + {Instruction::SUB, {X, {Instruction::ADD, {Y, A}}}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } + }, { + // X - (A + Y) -> (X - Y) + (-A) + {Instruction::SUB, {X, {Instruction::ADD, {A, Y}}}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } + } + }); +} + +Pattern::Pattern(Instruction _instruction, std::vector const& _arguments): + m_type(Operation), + m_instruction(_instruction), + m_arguments(_arguments) +{ +} + +void Pattern::setMatchGroup(unsigned _group, map& _matchGroups) +{ + m_matchGroup = _group; + m_matchGroups = &_matchGroups; +} + +bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const +{ + if (!matchesBaseItem(_expr.item)) + return false; + if (m_matchGroup) + { + if (!m_matchGroups->count(m_matchGroup)) + (*m_matchGroups)[m_matchGroup] = &_expr; + else if ((*m_matchGroups)[m_matchGroup]->id != _expr.id) + return false; + } + assertThrow(m_arguments.size() == 0 || _expr.arguments.size() == m_arguments.size(), OptimizerException, ""); + for (size_t i = 0; i < m_arguments.size(); ++i) + if (!m_arguments[i].matches(_classes.representative(_expr.arguments[i]), _classes)) + return false; + return true; +} + +AssemblyItem Pattern::toAssemblyItem(SourceLocation const& _location) const +{ + if (m_type == Operation) + return AssemblyItem(m_instruction, _location); + else + return AssemblyItem(m_type, data(), _location); +} + +string Pattern::toString() const +{ + stringstream s; + switch (m_type) + { + case Operation: + s << instructionInfo(m_instruction).name; + break; + case Push: + s << "PUSH " << hex << data(); + break; + case UndefinedItem: + s << "ANY"; + break; + default: + s << "t=" << dec << m_type << " d=" << hex << data(); + break; + } + if (!m_requireDataMatch) + s << " ~"; + if (m_matchGroup) + s << "[" << dec << m_matchGroup << "]"; + s << "("; + for (Pattern const& p: m_arguments) + s << p.toString() << ", "; + s << ")"; + return s.str(); +} + +bool Pattern::matchesBaseItem(AssemblyItem const* _item) const +{ + if (m_type == UndefinedItem) + return true; + if (!_item) + return false; + if (m_type != _item->type()) + return false; + else if (m_type == Operation) + return m_instruction == _item->instruction(); + else if (m_requireDataMatch) + return data() == _item->data(); + return true; +} + +Pattern::Expression const& Pattern::matchGroupValue() const +{ + assertThrow(m_matchGroup > 0, OptimizerException, ""); + assertThrow(!!m_matchGroups, OptimizerException, ""); + assertThrow((*m_matchGroups)[m_matchGroup], OptimizerException, ""); + return *(*m_matchGroups)[m_matchGroup]; +} + +u256 const& Pattern::data() const +{ + assertThrow(m_data, OptimizerException, ""); + return *m_data; +} + +ExpressionTemplate::ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location) +{ + if (_pattern.matchGroup()) + { + hasId = true; + id = _pattern.id(); + } + else + { + hasId = false; + item = _pattern.toAssemblyItem(_location); + } + for (auto const& arg: _pattern.arguments()) + arguments.push_back(ExpressionTemplate(arg, _location)); +} + +string ExpressionTemplate::toString() const +{ + stringstream s; + if (hasId) + s << id; + else + s << item; + s << "("; + for (auto const& arg: arguments) + s << arg.toString(); + s << ")"; + return s.str(); +} diff --git a/libevmasm/SimplificationRules.h b/libevmasm/SimplificationRules.h new file mode 100644 index 00000000..a4da5849 --- /dev/null +++ b/libevmasm/SimplificationRules.h @@ -0,0 +1,140 @@ +/* + This file is part of solidity. + + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with solidity. If not, see . +*/ +/** + * @file SimplificationRules + * @author Christian + * @date 2017 + * Module for applying replacement rules against Expressions. + */ + +#pragma once + +#include + +#include +#include + +namespace dev +{ +namespace eth +{ + +class Pattern; + +/** + * Container for all simplification rules. + */ +class Rules: public boost::noncopyable +{ +public: + using Expression = ExpressionClasses::Expression; + + Rules(); + + /// @returns a pointer to the first matching pattern and sets the match + /// groups accordingly. + std::pair> const* findFirstMatch( + Expression const& _expr, + ExpressionClasses const& _classes + ); + +private: + void addRules(std::vector>> const& _rules); + void addRule(std::pair> const& _rule); + + void resetMatchGroups() { m_matchGroups.clear(); } + + std::map m_matchGroups; + std::vector>> m_rules[256]; +}; + +/** + * Pattern to match against an expression. + * Also stores matched expressions to retrieve them later, for constructing new expressions using + * ExpressionTemplate. + */ +class Pattern +{ +public: + using Expression = ExpressionClasses::Expression; + using Id = ExpressionClasses::Id; + + // Matches a specific constant value. + Pattern(unsigned _value): Pattern(u256(_value)) {} + // Matches a specific constant value. + Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(std::make_shared(_value)) {} + // Matches a specific assembly item type or anything if not given. + Pattern(AssemblyItemType _type = UndefinedItem): m_type(_type) {} + // Matches a given instruction with given arguments + Pattern(Instruction _instruction, std::vector const& _arguments = {}); + /// Sets this pattern to be part of the match group with the identifier @a _group. + /// Inside one rule, all patterns in the same match group have to match expressions from the + /// same expression equivalence class. + void setMatchGroup(unsigned _group, std::map& _matchGroups); + unsigned matchGroup() const { return m_matchGroup; } + bool matches(Expression const& _expr, ExpressionClasses const& _classes) const; + + AssemblyItem toAssemblyItem(SourceLocation const& _location) const; + std::vector arguments() const { return m_arguments; } + + /// @returns the id of the matched expression if this pattern is part of a match group. + Id id() const { return matchGroupValue().id; } + /// @returns the data of the matched expression if this pattern is part of a match group. + u256 const& d() const { return matchGroupValue().item->data(); } + + std::string toString() const; + + AssemblyItemType type() const { return m_type; } + Instruction instruction() const + { + assertThrow(type() == Operation, OptimizerException, ""); + return m_instruction; + } + +private: + bool matchesBaseItem(AssemblyItem const* _item) const; + Expression const& matchGroupValue() const; + u256 const& data() const; + + AssemblyItemType m_type; + bool m_requireDataMatch = false; + Instruction m_instruction; ///< Only valid if m_type is Operation + std::shared_ptr m_data; ///< Only valid if m_type is not Operation + std::vector m_arguments; + unsigned m_matchGroup = 0; + std::map* m_matchGroups = nullptr; +}; + +/** + * Template for a new expression that can be built from matched patterns. + */ +struct ExpressionTemplate +{ + using Expression = ExpressionClasses::Expression; + using Id = ExpressionClasses::Id; + explicit ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location); + std::string toString() const; + bool hasId = false; + /// Id of the matched expression, if available. + Id id = Id(-1); + // Otherwise, assembly item. + AssemblyItem item = UndefinedItem; + std::vector arguments; +}; + +} +} -- cgit From da5e171f3b7a8d16fb01f4366f61a301567f1ae2 Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 6 Jan 2017 17:05:27 +0100 Subject: Optimize constant optimization. --- libevmasm/ConstantOptimiser.cpp | 76 +++++++++++++++++++++++++---------------- libevmasm/ConstantOptimiser.h | 19 ++++++----- 2 files changed, 58 insertions(+), 37 deletions(-) diff --git a/libevmasm/ConstantOptimiser.cpp b/libevmasm/ConstantOptimiser.cpp index f4a50c2d..601ce812 100644 --- a/libevmasm/ConstantOptimiser.cpp +++ b/libevmasm/ConstantOptimiser.cpp @@ -38,6 +38,7 @@ unsigned ConstantOptimisationMethod::optimiseConstants( for (AssemblyItem const& item: _items) if (item.type() == Push) pushes[item]++; + map pendingReplacements; for (auto it: pushes) { AssemblyItem const& item = it.first; @@ -53,17 +54,22 @@ unsigned ConstantOptimisationMethod::optimiseConstants( bigint copyGas = copy.gasNeeded(); ComputeMethod compute(params, item.data()); bigint computeGas = compute.gasNeeded(); + AssemblyItems replacement; if (copyGas < literalGas && copyGas < computeGas) { - copy.execute(_assembly, _items); + replacement = copy.execute(_assembly); optimisations++; } else if (computeGas < literalGas && computeGas < copyGas) { - compute.execute(_assembly, _items); + replacement = compute.execute(_assembly); optimisations++; } + if (!replacement.empty()) + pendingReplacements[item.data()] = replacement; } + if (!pendingReplacements.empty()) + replaceConstants(_items, pendingReplacements); return optimisations; } @@ -101,18 +107,24 @@ size_t ConstantOptimisationMethod::bytesRequired(AssemblyItems const& _items) void ConstantOptimisationMethod::replaceConstants( AssemblyItems& _items, - AssemblyItems const& _replacement -) const + map const& _replacements +) { - assertThrow(_items.size() > 0, OptimizerException, ""); - for (size_t i = 0; i < _items.size(); ++i) + AssemblyItems replaced; + for (AssemblyItem const& item: _items) { - if (_items.at(i) != AssemblyItem(m_value)) - continue; - _items[i] = _replacement[0]; - _items.insert(_items.begin() + i + 1, _replacement.begin() + 1, _replacement.end()); - i += _replacement.size() - 1; + if (item.type() == Push) + { + auto it = _replacements.find(item.data()); + if (it != _replacements.end()) + { + replaced += it->second; + continue; + } + } + replaced.push_back(item); } + _items = std::move(replaced); } bigint LiteralMethod::gasNeeded() @@ -128,38 +140,44 @@ bigint LiteralMethod::gasNeeded() CodeCopyMethod::CodeCopyMethod(Params const& _params, u256 const& _value): ConstantOptimisationMethod(_params, _value) { - m_copyRoutine = AssemblyItems{ - u256(0), - Instruction::DUP1, - Instruction::MLOAD, // back up memory - u256(32), - AssemblyItem(PushData, u256(1) << 16), // has to be replaced - Instruction::DUP4, - Instruction::CODECOPY, - Instruction::DUP2, - Instruction::MLOAD, - Instruction::SWAP2, - Instruction::MSTORE - }; } bigint CodeCopyMethod::gasNeeded() { return combineGas( // Run gas: we ignore memory increase costs - simpleRunGas(m_copyRoutine) + GasCosts::copyGas, + simpleRunGas(copyRoutine()) + GasCosts::copyGas, // Data gas for copy routines: Some bytes are zero, but we ignore them. - bytesRequired(m_copyRoutine) * (m_params.isCreation ? GasCosts::txDataNonZeroGas : GasCosts::createDataGas), + bytesRequired(copyRoutine()) * (m_params.isCreation ? GasCosts::txDataNonZeroGas : GasCosts::createDataGas), // Data gas for data itself dataGas(toBigEndian(m_value)) ); } -void CodeCopyMethod::execute(Assembly& _assembly, AssemblyItems& _items) +AssemblyItems CodeCopyMethod::execute(Assembly& _assembly) { bytes data = toBigEndian(m_value); - m_copyRoutine[4] = _assembly.newData(data); - replaceConstants(_items, m_copyRoutine); + AssemblyItems actualCopyRoutine = copyRoutine(); + actualCopyRoutine[4] = _assembly.newData(data); + return actualCopyRoutine; +} + +AssemblyItems const& CodeCopyMethod::copyRoutine() const +{ + AssemblyItems static copyRoutine{ + u256(0), + Instruction::DUP1, + Instruction::MLOAD, // back up memory + u256(32), + AssemblyItem(PushData, u256(1) << 16), // has to be replaced + Instruction::DUP4, + Instruction::CODECOPY, + Instruction::DUP2, + Instruction::MLOAD, + Instruction::SWAP2, + Instruction::MSTORE + }; + return copyRoutine; } AssemblyItems ComputeMethod::findRepresentation(u256 const& _value) diff --git a/libevmasm/ConstantOptimiser.h b/libevmasm/ConstantOptimiser.h index b35b2a69..dfa2fbf8 100644 --- a/libevmasm/ConstantOptimiser.h +++ b/libevmasm/ConstantOptimiser.h @@ -60,7 +60,10 @@ public: explicit ConstantOptimisationMethod(Params const& _params, u256 const& _value): m_params(_params), m_value(_value) {} virtual bigint gasNeeded() = 0; - virtual void execute(Assembly& _assembly, AssemblyItems& _items) = 0; + /// Executes the method, potentially appending to the assembly and returns a vector of + /// assembly items the constant should be relpaced with in one sweep. + /// If the vector is empty, the constants will not be deleted. + virtual AssemblyItems execute(Assembly& _assembly) = 0; protected: size_t dataSize() const { return std::max(1, dev::bytesRequired(m_value)); } @@ -83,8 +86,8 @@ protected: return m_params.runs * _runGas + m_params.multiplicity * _repeatedDataGas + _uniqueDataGas; } - /// Replaces the constant by the code given in @a _replacement. - void replaceConstants(AssemblyItems& _items, AssemblyItems const& _replacement) const; + /// Replaces all constants i by the code given in @a _replacement[i]. + static void replaceConstants(AssemblyItems& _items, std::map const& _replacement); Params m_params; u256 const& m_value; @@ -100,7 +103,7 @@ public: explicit LiteralMethod(Params const& _params, u256 const& _value): ConstantOptimisationMethod(_params, _value) {} virtual bigint gasNeeded() override; - virtual void execute(Assembly&, AssemblyItems&) override {} + virtual AssemblyItems execute(Assembly&) override { return AssemblyItems{}; } }; /** @@ -111,10 +114,10 @@ class CodeCopyMethod: public ConstantOptimisationMethod public: explicit CodeCopyMethod(Params const& _params, u256 const& _value); virtual bigint gasNeeded() override; - virtual void execute(Assembly& _assembly, AssemblyItems& _items) override; + virtual AssemblyItems execute(Assembly& _assembly) override; protected: - AssemblyItems m_copyRoutine; + AssemblyItems const& copyRoutine() const; }; /** @@ -130,9 +133,9 @@ public: } virtual bigint gasNeeded() override { return gasNeeded(m_routine); } - virtual void execute(Assembly&, AssemblyItems& _items) override + virtual AssemblyItems execute(Assembly&) override { - replaceConstants(_items, m_routine); + return m_routine; } protected: -- cgit From 7127f2fdfdc1bfb8efd9a1212725e36c3c42ad9a Mon Sep 17 00:00:00 2001 From: chriseth Date: Tue, 10 Jan 2017 16:35:10 +0100 Subject: Fix debug output. --- libevmasm/SimplificationRules.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/libevmasm/SimplificationRules.cpp b/libevmasm/SimplificationRules.cpp index bb950400..0e95b4bb 100644 --- a/libevmasm/SimplificationRules.cpp +++ b/libevmasm/SimplificationRules.cpp @@ -283,13 +283,19 @@ string Pattern::toString() const s << instructionInfo(m_instruction).name; break; case Push: - s << "PUSH " << hex << data(); + if (m_data) + s << "PUSH " << hex << data(); + else + s << "PUSH "; break; case UndefinedItem: s << "ANY"; break; default: - s << "t=" << dec << m_type << " d=" << hex << data(); + if (m_data) + s << "t=" << dec << m_type << " d=" << hex << data(); + else + s << "t=" << dec << m_type << " d: nullptr"; break; } if (!m_requireDataMatch) -- cgit From 58509ca3aa1f1c649303bca4e705c1c96e832c08 Mon Sep 17 00:00:00 2001 From: chriseth Date: Tue, 10 Jan 2017 16:35:18 +0100 Subject: Fix match groups. --- libevmasm/SimplificationRules.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/libevmasm/SimplificationRules.cpp b/libevmasm/SimplificationRules.cpp index 0e95b4bb..2976d95f 100644 --- a/libevmasm/SimplificationRules.cpp +++ b/libevmasm/SimplificationRules.cpp @@ -48,6 +48,7 @@ pair > const* Rules::findFirstMatch( { if (rule.first.matches(_expr, _classes)) return &rule; + resetMatchGroups(); } return nullptr; } -- cgit From d34b561f682336bef0258537f7fdf87b16f4a741 Mon Sep 17 00:00:00 2001 From: chriseth Date: Tue, 10 Jan 2017 17:38:42 +0100 Subject: Windows build fix. --- libevmasm/CommonSubexpressionEliminator.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libevmasm/CommonSubexpressionEliminator.cpp b/libevmasm/CommonSubexpressionEliminator.cpp index 6294e579..fd4fffa6 100644 --- a/libevmasm/CommonSubexpressionEliminator.cpp +++ b/libevmasm/CommonSubexpressionEliminator.cpp @@ -303,7 +303,9 @@ void CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) for (auto it: m_classPositions) for (auto p: it.second) if (p > m_stackHeight) + { assertThrow(false, OptimizerException, ""); + } // do some cleanup removeStackTopIfPossible(); -- cgit From a32def5c42af6785ca83842fb583f9bf9d427fd4 Mon Sep 17 00:00:00 2001 From: chriseth Date: Wed, 11 Jan 2017 10:25:48 +0100 Subject: Changelog entry. --- Changelog.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Changelog.md b/Changelog.md index 7676b5be..af484517 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,9 +1,10 @@ ### 0.4.8 (unreleased) Features: + * Optimiser: Performance improments. * Output: Print assembly in new standardized Solidity assembly format. -BugFixes: +Bugfixes: * Remappings: Prefer longer context over longer prefix. * Type checker, code generator: enable access to events of base contracts' names. * Imports: ``import ".dir/a"`` is not a relative path. Relative paths begin with directory ``.`` or ``..``. -- cgit From 3cfa851f6f4175ef17c4fc1079cda67b516a2d72 Mon Sep 17 00:00:00 2001 From: chriseth Date: Thu, 12 Jan 2017 17:52:23 +0100 Subject: Minor changes from review. --- Changelog.md | 2 +- cmake/EthCompilerSettings.cmake | 2 +- libevmasm/AssemblyItem.h | 1 - libevmasm/ConstantOptimiser.cpp | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Changelog.md b/Changelog.md index af484517..3e999d13 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,7 +1,7 @@ ### 0.4.8 (unreleased) Features: - * Optimiser: Performance improments. + * Optimiser: Performance improvements. * Output: Print assembly in new standardized Solidity assembly format. Bugfixes: diff --git a/cmake/EthCompilerSettings.cmake b/cmake/EthCompilerSettings.cmake index 65742dfb..c734423b 100644 --- a/cmake/EthCompilerSettings.cmake +++ b/cmake/EthCompilerSettings.cmake @@ -71,7 +71,7 @@ if (("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU") OR ("${CMAKE_CXX_COMPILER_ID}" MA add_compile_options(-fPIC) # Configuration-specific compiler settings. - set(CMAKE_CXX_FLAGS_DEBUG " -g -DETH_DEBUG") + set(CMAKE_CXX_FLAGS_DEBUG "-Og -g -DETH_DEBUG") set(CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g") diff --git a/libevmasm/AssemblyItem.h b/libevmasm/AssemblyItem.h index 7fcaa145..002b3c87 100644 --- a/libevmasm/AssemblyItem.h +++ b/libevmasm/AssemblyItem.h @@ -86,7 +86,6 @@ public: AssemblyItemType type() const { return m_type; } u256 const& data() const { assertThrow(m_type != Operation, Exception, ""); return *m_data; } - //void setType(AssemblyItemType const _type) { m_type = _type; } void setData(u256 const& _data) { assertThrow(m_type != Operation, Exception, ""); m_data = std::make_shared(_data); } /// @returns the instruction of this item (only valid if type() == Operation) diff --git a/libevmasm/ConstantOptimiser.cpp b/libevmasm/ConstantOptimiser.cpp index 601ce812..86244e17 100644 --- a/libevmasm/ConstantOptimiser.cpp +++ b/libevmasm/ConstantOptimiser.cpp @@ -60,7 +60,7 @@ unsigned ConstantOptimisationMethod::optimiseConstants( replacement = copy.execute(_assembly); optimisations++; } - else if (computeGas < literalGas && computeGas < copyGas) + else if (computeGas < literalGas && computeGas <= copyGas) { replacement = compute.execute(_assembly); optimisations++; -- cgit From e22672b7c739dde9f37a919e63245abda4b1fc89 Mon Sep 17 00:00:00 2001 From: chriseth Date: Fri, 13 Jan 2017 12:18:22 +0100 Subject: Update Changelog.md --- Changelog.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Changelog.md b/Changelog.md index bef98f15..b8effc19 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,4 +1,4 @@ -### 0.4.8 (unreleased) +### 0.4.8 (2017-01-13) Features: * Optimiser: Performance improvements. -- cgit