diff options
40 files changed, 2170 insertions, 661 deletions
diff --git a/.travis.yml b/.travis.yml index 2160d175..017f1a61 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,11 +26,13 @@ language: cpp branches: - # We need to whitelist the branches which we want to have "push" automation. + # We need to whitelist the branches which we want to have "push" automation, + # this includes tags (which are treated as branches by travis). # Pull request automation is not constrained to this set of branches. only: - develop - release + - /^v[0-9]/ matrix: include: # Ubuntu 14.04 LTS "Trusty Tahr" @@ -71,6 +73,7 @@ matrix: dist: trusty sudo: required compiler: gcc + node_js: stable services: - docker before_install: @@ -137,11 +140,12 @@ cache: directories: - boost_1_57_0 - build + - $HOME/.local install: - test $TRAVIS_INSTALL_DEPS != On || ./scripts/install_deps.sh + - test "$TRAVIS_OS_NAME" != "linux" || ./scripts/install_cmake.sh - echo -n "$TRAVIS_COMMIT" > commit_hash.txt - - test "$TRAVIS_PULL_REQUESTS" != "false" || test "$TRAVIS_BRANCH" != release || echo -n > prerelease.txt # this is a proper release before_script: - test $TRAVIS_EMSCRIPTEN != On || ./scripts/build_emscripten.sh - test $TRAVIS_RELEASE != On || (mkdir -p build @@ -149,7 +153,8 @@ before_script: && cmake .. -DCMAKE_BUILD_TYPE=$TRAVIS_BUILD_TYPE && make -j2 && cd .. - && ./scripts/release.sh $ZIP_SUFFIX ) + && ./scripts/release.sh $ZIP_SUFFIX + && ./scripts/create_source_tarball.sh ) script: - test $TRAVIS_DOCS != On || ./scripts/docs.sh @@ -190,43 +195,20 @@ deploy: - release # This is the deploy target for the native build (Linux and macOS) - # which generates ZIPs per commit. We are in agreement that - # generating ZIPs per commit for the develop branch is probably - # just noise, so we only run this deployment target on 'release'. - # - # Unlike the Appveyor GitHub Releases target, the support in TravisCI - # seemingly doesn't provide a means for passing a description, tag, etc. - # In practice, we are letting the Appveyor CI do all that stuff, and - # then this deployment flow just seems to find that most recent tag, - # and just add our Linux and macOS ZIPs into the same tag, which is - # what we want to happen. But is very accidental and brittle-looking. - # - # The 'skip_cleanup' stops the workspace being cleaned out prior to - # generation of the artifacts. Strange that we should explicitly - # need to do that, but we do. - # - # Tokens in TravisCI can be generated a few different ways. Bob had - # success using the 'travis' gem, and then using that gem to - # create/edit this .travis.yml file, and then cut-and-pasting the - # good bits back out of what it generated. The gem changes all the - # whitespace and deletes comments, so cannot be used as-is. But - # it does generate an appropriate auth token. - # - # TODO - I do not know if the api_key below which work correctly - # for ethereum/solidity. I suspect not, for the same reason as - # my auth token does not work for Appveyor. I don't have enough - # permissions to enable this myself. Christian should be able to. - # - # See https://docs.travis-ci.com/user/deployment/releases - # See https://blog.travis-ci.com/2013-01-28-token-token-token/ - # See https://github.com/ethereum/webthree-umbrella/issues/658 + # which generates ZIPs per commit and the source tarball. # + # This runs for each tag that is created and adds the corresponding files. - provider: releases api_key: secure: PWH37xVBCF0XiSjl+eH7XIdkrfxZXjzvqF4PiBOnD3VnFz+odrdnIwBmCeBYTHTWF8efpp8fmzWJk2UVq1JcpyZiC+SVxO8dx91W2ia1a+wKrEQuDgkUrZBkl5IQNCv0QS81DDQhliyZEaYh8wHO/7RReyMpGpw2U2u85WkFiZ+LdlHEZPfzUeh9lxQ9n8qwFL8Rja+Q05d4cQ8zaVEtofJJT4T6DUWhc3TzuxDYxOmjwg37rC9CkGSLn6VadSh8b3j5R0SZupFsAEvBL/imBLP9r9ewoo7o4p6By3jwiIgH9yNg7LM618xbffcNaYF/KtLBi9uPHfqF7hRD4PlECz+D0PR78nQItOX5HKm1QMg5kCnghRVCA0IVjpV5fiYQnMLM7dCRv34I5b3zLpa69wQ/GLYB2FViqNUfvPeiZTEeIJ2OmATlFx8AH2JoqpY1XJknWb35+vMfa8LSiJJW++SLWeV+ncC92hrvyZ1cy3trepRRZIfyYepxHifnfdWMkddQUJk5b2WS5Fy/TJLZNPeombnpvRhUC38dsYItarKeXTc6k4oADCEDZ2rgGIcEiqRxXV11Y5xHJekLDWzUs+YJNcCuL4pnAP//LOnbnH2w9rLpwhQYSl0anCd097NivAXQJXO2JI/byIYz1kiCVQWnW6EM8+72mLOklf/Qr8k= - file: $TRAVIS_BUILD_DIR/solidity-$ZIP_SUFFIX.zip + + overwrite: true + file_glob: true + file: + - $TRAVIS_BUILD_DIR/solidity*.zip + - $TRAVIS_BUILD_DIR/solidity*tar.gz skip_cleanup: true on: - repo: ethereum/solidity - branch: release + all_branches: true + tags: true condition: $TRAVIS_RELEASE == On diff --git a/CMakeLists.txt b/CMakeLists.txt index ee66eebd..ecd857ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ include(EthPolicy) eth_policy() # project name and version should be set after cmake_policy CMP0048 -set(PROJECT_VERSION "0.4.7") +set(PROJECT_VERSION "0.4.8") project(solidity VERSION ${PROJECT_VERSION}) # Let's find our dependencies diff --git a/Changelog.md b/Changelog.md index 096c72d4..b8effc19 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,3 +1,15 @@ +### 0.4.8 (2017-01-13) + +Features: + * Optimiser: Performance improvements. + * Output: Print assembly in new standardized Solidity assembly format. + +Bugfixes: + * Remappings: Prefer longer context over longer prefix. + * Type checker, code generator: enable access to events of base contracts' names. + * Imports: ``import ".dir/a"`` is not a relative path. Relative paths begin with directory ``.`` or ``..``. + * Type checker, disallow inheritances of different kinds (e.g. a function and a modifier) of members of the same name + ### 0.4.7 (2016-12-15) Features: diff --git a/docs/assembly.rst b/docs/assembly.rst new file mode 100644 index 00000000..57c0bf9b --- /dev/null +++ b/docs/assembly.rst @@ -0,0 +1,950 @@ +################# +Solidity Assembly +################# + +.. index:: ! assembly, ! asm, ! evmasm + +Solidity defines an assembly language that can also be used without Solidity. +This assembly language can also be used as "inline assembly" inside Solidity +source code. We start with describing how to use inline assembly and how it +differs from standalone assembly and then specify assembly itself. + +TODO: Write about how scoping rules of inline assembly are a bit different +and the complications that arise when for example using internal functions +of libraries. Furhermore, write about the symbols defined by the compiler. + +Inline Assembly +=============== + +For more fine-grained control especially in order to enhance the language by writing libraries, +it is possible to interleave Solidity statements with inline assembly in a language close +to the one of the virtual machine. Due to the fact that the EVM is a stack machine, it is +often hard to address the correct stack slot and provide arguments to opcodes at the correct +point on the stack. Solidity's inline assembly tries to facilitate that and other issues +arising when writing manual assembly by the following features: + +* functional-style opcodes: ``mul(1, add(2, 3))`` instead of ``push1 3 push1 2 add push1 1 mul`` +* assembly-local variables: ``let x := add(2, 3) let y := mload(0x40) x := add(x, y)`` +* access to external variables: ``function f(uint x) { assembly { x := sub(x, 1) } }`` +* labels: ``let x := 10 repeat: x := sub(x, 1) jumpi(repeat, eq(x, 0))`` +* loops: ``for { let i := 0 } lt(i, x) { i := add(i, 1) } { y := mul(2, y) }`` +* switch statements: ``switch x case 0: { y := mul(x, 2) } default: { y := 0 }`` +* function calls: ``function f(x) -> (y) { switch x case 0: { y := 1 } default: { y := mul(x, f(sub(x, 1))) } }`` + +.. note:: + Of the above, loops, function calls and switch statements are not yet implemented. + +We now want to describe the inline assembly language in detail. + +.. warning:: + Inline assembly is still a relatively new feature and might change if it does not prove useful, + so please try to keep up to date. + +Example +------- + +The following example provides library code to access the code of another contract and +load it into a ``bytes`` variable. This is not possible at all with "plain Solidity" and the +idea is that assembly libraries will be used to enhance the language in such ways. + +.. code:: + + library GetCode { + function at(address _addr) returns (bytes o_code) { + assembly { + // retrieve the size of the code, this needs assembly + let size := extcodesize(_addr) + // allocate output byte array - this could also be done without assembly + // by using o_code = new bytes(size) + o_code := mload(0x40) + // new "memory end" including padding + mstore(0x40, add(o_code, and(add(add(size, 0x20), 0x1f), not(0x1f)))) + // store length in memory + mstore(o_code, size) + // actually retrieve the code, this needs assembly + extcodecopy(_addr, add(o_code, 0x20), 0, size) + } + } + } + +Inline assembly could also be beneficial in cases where the optimizer fails to produce +efficient code. Please be aware that assembly is much more difficult to write because +the compiler does not perform checks, so you should use it only if +you really know what you are doing. + +.. code:: + + library VectorSum { + // This function is less efficient because the optimizer currently fails to + // remove the bounds checks in array access. + function sumSolidity(uint[] _data) returns (uint o_sum) { + for (uint i = 0; i < _data.length; ++i) + o_sum += _data[i]; + } + + // We know that we only access the array in bounds, so we can avoid the check. + // 0x20 needs to be added to an array because the first slot contains the + // array length. + function sumAsm(uint[] _data) returns (uint o_sum) { + for (uint i = 0; i < _data.length; ++i) { + assembly { + o_sum := mload(add(add(_data, 0x20), mul(i, 0x20))) + } + } + } + } + + +Syntax +------ + +Assembly parses comments, literals and identifiers exactly as Solidity, so you can use the +usual ``//`` and ``/* */`` comments. Inline assembly is marked by ``assembly { ... }`` and inside +these curly braces, the following can be used (see the later sections for more details) + + - literals, i.e. ``0x123``, ``42`` or ``"abc"`` (strings up to 32 characters) + - opcodes (in "instruction style"), e.g. ``mload sload dup1 sstore``, for a list see below + - opcode in functional style, e.g. ``add(1, mlod(0))`` + - labels, e.g. ``name:`` + - variable declarations, e.g. ``let x := 7`` or ``let x := add(y, 3)`` + - identifiers (labels or assembly-local variables and externals if used as inline assembly), e.g. ``jump(name)``, ``3 x add`` + - assignments (in "instruction style"), e.g. ``3 =: x`` + - assignments in functional style, e.g. ``x := add(y, 3)`` + - blocks where local variables are scoped inside, e.g. ``{ let x := 3 { let y := add(x, 1) } }`` + +Opcodes +------- + +This document does not want to be a full description of the Ethereum virtual machine, but the +following list can be used as a reference of its opcodes. + +If an opcode takes arguments (always from the top of the stack), they are given in parentheses. +Note that the order of arguments can be seed to be reversed in non-functional style (explained below). +Opcodes marked with ``-`` do not push an item onto the stack, those marked with ``*`` are +special and all others push exactly one item onte the stack. + +In the following, ``mem[a...b)`` signifies the bytes of memory starting at position ``a`` up to +(excluding) position ``b`` and ``storage[p]`` signifies the storage contents at position ``p``. + +The opcodes ``pushi`` and ``jumpdest`` cannot be used directly. + +In the grammar, opcodes are represented as pre-defined identifiers. + ++-------------------------+------+-----------------------------------------------------------------+ +| stop + `-` | stop execution, identical to return(0,0) | ++-------------------------+------+-----------------------------------------------------------------+ +| add(x, y) | | x + y | ++-------------------------+------+-----------------------------------------------------------------+ +| sub(x, y) | | x - y | ++-------------------------+------+-----------------------------------------------------------------+ +| mul(x, y) | | x * y | ++-------------------------+------+-----------------------------------------------------------------+ +| div(x, y) | | x / y | ++-------------------------+------+-----------------------------------------------------------------+ +| sdiv(x, y) | | x / y, for signed numbers in two's complement | ++-------------------------+------+-----------------------------------------------------------------+ +| mod(x, y) | | x % y | ++-------------------------+------+-----------------------------------------------------------------+ +| smod(x, y) | | x % y, for signed numbers in two's complement | ++-------------------------+------+-----------------------------------------------------------------+ +| exp(x, y) | | x to the power of y | ++-------------------------+------+-----------------------------------------------------------------+ +| not(x) | | ~x, every bit of x is negated | ++-------------------------+------+-----------------------------------------------------------------+ +| lt(x, y) | | 1 if x < y, 0 otherwise | ++-------------------------+------+-----------------------------------------------------------------+ +| gt(x, y) | | 1 if x > y, 0 otherwise | ++-------------------------+------+-----------------------------------------------------------------+ +| slt(x, y) | | 1 if x < y, 0 otherwise, for signed numbers in two's complement | ++-------------------------+------+-----------------------------------------------------------------+ +| sgt(x, y) | | 1 if x > y, 0 otherwise, for signed numbers in two's complement | ++-------------------------+------+-----------------------------------------------------------------+ +| eq(x, y) | | 1 if x == y, 0 otherwise | ++-------------------------+------+-----------------------------------------------------------------+ +| iszero(x) | | 1 if x == 0, 0 otherwise | ++-------------------------+------+-----------------------------------------------------------------+ +| and(x, y) | | bitwise and of x and y | ++-------------------------+------+-----------------------------------------------------------------+ +| or(x, y) | | bitwise or of x and y | ++-------------------------+------+-----------------------------------------------------------------+ +| xor(x, y) | | bitwise xor of x and y | ++-------------------------+------+-----------------------------------------------------------------+ +| byte(n, x) | | nth byte of x, where the most significant byte is the 0th byte | ++-------------------------+------+-----------------------------------------------------------------+ +| addmod(x, y, m) | | (x + y) % m with arbitrary precision arithmetics | ++-------------------------+------+-----------------------------------------------------------------+ +| mulmod(x, y, m) | | (x * y) % m with arbitrary precision arithmetics | ++-------------------------+------+-----------------------------------------------------------------+ +| signextend(i, x) | | sign extend from (i*8+7)th bit counting from least significant | ++-------------------------+------+-----------------------------------------------------------------+ +| sha3(p, n) | | keccak(mem[p...(p+n))) | ++-------------------------+------+-----------------------------------------------------------------+ +| jump(label) | `-` | jump to label / code position | ++-------------------------+------+-----------------------------------------------------------------+ +| jumpi(label, cond) | `-` | jump to label if cond is nonzero | ++-------------------------+------+-----------------------------------------------------------------+ +| pc | | current position in code | ++-------------------------+------+-----------------------------------------------------------------+ +| pop | `*` | remove topmost stack slot | ++-------------------------+------+-----------------------------------------------------------------+ +| dup1 ... dup16 | | copy ith stack slot to the top (counting from top) | ++-------------------------+------+-----------------------------------------------------------------+ +| swap1 ... swap16 | `*` | swap topmost and ith stack slot below it | ++-------------------------+------+-----------------------------------------------------------------+ +| mload(p) | | mem[p..(p+32)) | ++-------------------------+------+-----------------------------------------------------------------+ +| mstore(p, v) | `-` | mem[p..(p+32)) := v | ++-------------------------+------+-----------------------------------------------------------------+ +| mstore8(p, v) | `-` | mem[p] := v & 0xff - only modifies a single byte | ++-------------------------+------+-----------------------------------------------------------------+ +| sload(p) | | storage[p] | ++-------------------------+------+-----------------------------------------------------------------+ +| sstore(p, v) | `-` | storage[p] := v | ++-------------------------+------+-----------------------------------------------------------------+ +| msize | | size of memory, i.e. largest accessed memory index | ++-------------------------+------+-----------------------------------------------------------------+ +| gas | | gas still available to execution | ++-------------------------+------+-----------------------------------------------------------------+ +| address | | address of the current contract / execution context | ++-------------------------+------+-----------------------------------------------------------------+ +| balance(a) | | wei balance at address a | ++-------------------------+------+-----------------------------------------------------------------+ +| caller | | call sender (excluding delegatecall) | ++-------------------------+------+-----------------------------------------------------------------+ +| callvalue | | wei sent together with the current call | ++-------------------------+------+-----------------------------------------------------------------+ +| calldataload(p) | | call data starting from position p (32 bytes) | ++-------------------------+------+-----------------------------------------------------------------+ +| calldatasize | | size of call data in bytes | ++-------------------------+------+-----------------------------------------------------------------+ +| calldatacopy(t, f, s) | `-` | copy s bytes from calldata at position f to mem at position t | ++-------------------------+------+-----------------------------------------------------------------+ +| codesize | | size of the code of the current contract / execution context | ++-------------------------+------+-----------------------------------------------------------------+ +| codecopy(t, f, s) | `-` | copy s bytes from code at position f to mem at position t | ++-------------------------+------+-----------------------------------------------------------------+ +| extcodesize(a) | | size of the code at address a | ++-------------------------+------+-----------------------------------------------------------------+ +| extcodecopy(a, t, f, s) | `-` | like codecopy(t, f, s) but take code at address a | ++-------------------------+------+-----------------------------------------------------------------+ +| create(v, p, s) | | create new contract with code mem[p..(p+s)) and send v wei | +| | | and return the new address | ++-------------------------+------+-----------------------------------------------------------------+ +| call(g, a, v, in, | | call contract at address a with input mem[in..(in+insize)] | +| insize, out, outsize) | | providing g gas and v wei and output area | +| | | mem[out..(out+outsize)] returting 1 on error (out of gas) | ++-------------------------+------+-----------------------------------------------------------------+ +| callcode(g, a, v, in, | | identical to call but only use the code from a and stay | +| insize, out, outsize) | | in the context of the current contract otherwise | ++-------------------------+------+-----------------------------------------------------------------+ +| delegatecall(g, a, in, | | identical to callcode but also keep ``caller`` | +| insize, out, outsize) | | and ``callvalue`` | ++-------------------------+------+-----------------------------------------------------------------+ +| return(p, s) | `*` | end execution, return data mem[p..(p+s)) | ++-------------------------+------+-----------------------------------------------------------------+ +| selfdestruct(a) | `*` | end execution, destroy current contract and send funds to a | ++-------------------------+------+-----------------------------------------------------------------+ +| log0(p, s) | `-` | log without topics and data mem[p..(p+s)) | ++-------------------------+------+-----------------------------------------------------------------+ +| log1(p, s, t1) | `-` | log with topic t1 and data mem[p..(p+s)) | ++-------------------------+------+-----------------------------------------------------------------+ +| log2(p, s, t1, t2) | `-` | log with topics t1, t2 and data mem[p..(p+s)) | ++-------------------------+------+-----------------------------------------------------------------+ +| log3(p, s, t1, t2, t3) | `-` | log with topics t1, t2, t3 and data mem[p..(p+s)) | ++-------------------------+------+-----------------------------------------------------------------+ +| log4(p, s, t1, t2, t3, | `-` | log with topics t1, t2, t3, t4 and data mem[p..(p+s)) | +| t4) | | | ++-------------------------+------+-----------------------------------------------------------------+ +| origin | | transaction sender | ++-------------------------+------+-----------------------------------------------------------------+ +| gasprice | | gas price of the transaction | ++-------------------------+------+-----------------------------------------------------------------+ +| blockhash(b) | | hash of block nr b - only for last 256 blocks excluding current | ++-------------------------+------+-----------------------------------------------------------------+ +| coinbase | | current mining beneficiary | ++-------------------------+------+-----------------------------------------------------------------+ +| timestamp | | timestamp of the current block in seconds since the epoch | ++-------------------------+------+-----------------------------------------------------------------+ +| number | | current block number | ++-------------------------+------+-----------------------------------------------------------------+ +| difficulty | | difficulty of the current block | ++-------------------------+------+-----------------------------------------------------------------+ +| gaslimit | | block gas limit of the current block | ++-------------------------+------+-----------------------------------------------------------------+ + +Literals +-------- + +You can use integer constants by typing them in decimal or hexadecimal notation and an +appropriate ``PUSHi`` instruction will automatically be generated. The following creates code +to add 2 and 3 resulting in 5 and then computes the bitwise and with the string "abc". +Strings are stored left-aligned and cannot be longer than 32 bytes. + +.. code:: + + assembly { 2 3 add "abc" and } + +Functional Style +----------------- + +You can type opcode after opcode in the same way they will end up in bytecode. For example +adding ``3`` to the contents in memory at position ``0x80`` would be + +.. code:: + + 3 0x80 mload add 0x80 mstore + +As it is often hard to see what the actual arguments for certain opcodes are, +Solidity inline assembly also provides a "functional style" notation where the same code +would be written as follows + +.. code:: + + mstore(0x80, add(mload(0x80), 3)) + +Functional style and instructional style can be mixed, but any opcode inside a +functional style expression has to return exactly one stack slot (most of the opcodes do). + +Note that the order of arguments is reversed in functional-style as opposed to the instruction-style +way. If you use functional-style, the first argument will end up on the stack top. + + +Access to External Variables and Functions +------------------------------------------ + +Solidity variables and other identifiers can be accessed by simply using their name. +For storage and memory variables, this will push the address and not the value onto the +stack. Also note that non-struct and non-array storage variable addresses occupy two slots +on the stack: One for the address and one for the byte offset inside the storage slot. +In assignments (see below), we can even use local Solidity variables to assign to. + +Functions external to inline assembly can also be accessed: The assembly will +push their entry label (with virtual function resolution applied). The calling semantics +in solidity are: + + - the caller pushes return label, arg1, arg2, ..., argn + - the call returns with ret1, ret2, ..., retn + +This feature is still a bit cumbersome to use, because the stack offset essentially +changes during the call, and thus references to local variables will be wrong. +It is planned that the stack height changes can be specified in inline assembly. + +.. code:: + + contract C { + uint b; + function f(uint x) returns (uint r) { + assembly { + b pop // remove the offset, we know it is zero + sload + x + mul + =: r // assign to return variable r + } + } + } + +Labels +------ + +Another problem in EVM assembly is that ``jump`` and ``jumpi`` use absolute addresses +which can change easily. Solidity inline assembly provides labels to make the use of +jumps easier. The following code computes an element in the Fibonacci series. + +.. code:: + + { + let n := calldataload(4) + let a := 1 + let b := a + loop: + jumpi(loopend, eq(n, 0)) + a add swap1 + n := sub(n, 1) + jump(loop) + loopend: + mstore(0, a) + return(0, 0x20) + } + +Please note that automatically accessing stack variables can only work if the +assembler knows the current stack height. This fails to work if the jump source +and target have different stack heights. It is still fine to use such jumps, but +you should just not access any stack variables (even assembly variables) in that case. + +Furthermore, the stack height analyser goes through the code opcode by opcode +(and not according to control flow), so in the following case, the assembler +will have a wrong impression about the stack height at label ``two``: + +.. code:: + + { + jump(two) + one: + // Here the stack height is 1 (because we pushed 7), + // but the assembler thinks it is 0 because it reads + // from top to bottom. + // Accessing stack variables here will lead to errors. + jump(three) + two: + 7 // push something onto the stack + jump(one) + three: + } + + +Declaring Assembly-Local Variables +---------------------------------- + +You can use the ``let`` keyword to declare variables that are only visible in +inline assembly and actually only in the current ``{...}``-block. What happens +is that the ``let`` instruction will create a new stack slot that is reserved +for the variable and automatically removed again when the end of the block +is reached. You need to provide an initial value for the variable which can +be just ``0``, but it can also be a complex functional-style expression. + +.. code:: + + contract C { + function f(uint x) returns (uint b) { + assembly { + let v := add(x, 1) + mstore(0x80, v) + { + let y := add(sload(v), 1) + b := y + } // y is "deallocated" here + b := add(b, v) + } // v is "deallocated" here + } + } + + +Assignments +----------- + +Assignments are possible to assembly-local variables and to function-local +variables. Take care that when you assign to variables that point to +memory or storage, you will only change the pointer and not the data. + +There are two kinds of assignments: functional-style and instruction-style. +For functional-style assignments (``variable := value``), you need to provide a value in a +functional-style expression that results in exactly one stack value +and for instruction-style (``=: variable``), the value is just taken from the stack top. +For both ways, the colon points to the name of the variable. The assignment +is performed by replacing the variable's value on the stack by the new value. + +.. code:: + + assembly { + let v := 0 // functional-style assignment as part of variable declaration + let g := add(v, 2) + sload(10) + =: v // instruction style assignment, puts the result of sload(10) into v + } + +Switch +------ + +You can use a switch statement as a very basic version of "if/else". +It takes the value of an expression and compares it to several constants. +The branch corresponding to the matching constant is taken. Contrary to the +error-prone behaviour of some programming languages, control flow does +not continue from one case to the next. There can be a fallback or default +case called ``default``. + +.. code:: + + assembly { + let x := 0 + switch calldataload(4) + case 0: { + x := calldataload(0x24) + } + default: { + x := calldataload(0x44) + } + sstore(0, div(x, 2)) + } + +The list of cases does not require curly braces, but the body of a +case does require them. + +Loops +----- + +Assembly supports a simple for-style loop. For-style loops have +a header containing an initializing part, a condition and a post-iteration +part. The condition has to be a functional-style expression, while +the other two can also be blocks. If the initializing part is a block that +declares any variables, the scope of these variables is extended into the +body (including the condition and the post-iteration part). + +The following example computes the sum of an area in memory. + +.. code:: + + assembly { + let x := 0 + for { let i := 0 } lt(i, 0x100) { i := add(i, 0x20) } { + x := add(x, mload(i)) + } + } + +Functions +--------- + +Assembly allows the definition of low-level functions. These take their +arguments (and a return PC) from the stack and also put the results onto the +stack. Calling a function looks the same way as executing a functional-style +opcode. + +Functions can be defined anywhere and are visible in the block they are +declared in. Inside a function, you cannot access local variables +defined outside of that function. There is no explicit ``return`` +statement. + +If you call a function that returns multiple values, you have to assign +them to a tuple using ``(a, b) := f(x)`` or ``let (a, b) := f(x)``. + +The following example implements the power function by square-and-multiply. + +.. code:: + + assembly { + function power(base, exponent) -> (result) { + switch exponent + 0: { result := 1 } + 1: { result := base } + default: { + result := power(mul(base, base), div(exponent, 2)) + switch mod(exponent, 2) + 1: { result := mul(base, result) } + } + } + } + +Things to Avoid +--------------- + +Inline assembly might have a quite high-level look, but it actually is extremely +low-level. Function calls, loops and switches are converted by simple +rewriting rules and after that, the only thing the assembler does for you is re-arranging +functional-style opcodes, managing jump labels, counting stack height for +variable access and removing stack slots for assembly-local variables when the end +of their block is reached. Especially for those two last cases, it is important +to know that the assembler only counts stack height from top to bottom, not +necessarily following control flow. Furthermore, operations like swap will only +swap the contents of the stack but not the location of variables. + +Conventions in Solidity +----------------------- + +In contrast to EVM assembly, Solidity knows types which are narrower than 256 bits, +e.g. ``uint24``. In order to make them more efficient, most arithmetic operations just +treat them as 256 bit numbers and the higher-order bits are only cleaned at the +point where it is necessary, i.e. just shortly before they are written to memory +or before comparisons are performed. This means that if you access such a variable +from within inline assembly, you might have to manually clean the higher order bits +first. + +Solidity manages memory in a very simple way: There is a "free memory pointer" +at position ``0x40`` in memory. If you want to allocate memory, just use the memory +from that point on and update the pointer accordingly. + +Elements in memory arrays in Solidity always occupy multiples of 32 bytes (yes, this is +even true for ``byte[]``, but not for ``bytes`` and ``string``). Multi-dimensional memory +arrays are pointers to memory arrays. The length of a dynamic array is stored at the +first slot of the array and then only the array elements follow. + +.. warning:: + Statically-sized memory arrays do not have a length field, but it will be added soon + to allow better convertibility between statically- and dynamically-sized arrays, so + please do not rely on that. + + +Standalone Assembly +=================== + +The assembly language described as inline assembly above can also be used +standalone and in fact, the plan is to use it as an intermediate language +for the Solidity compiler. In this form, it tries to achieve several goals: + +1. Programs written in it should be readable, even if the code is generated by a compiler from Solidity. +2. The translation from assembly to bytecode should contain as few "surprises" as possible. +3. Control flow should be easy to detect to help in formal verification and optimization. + +In order to achieve the first and last goal, assembly provides high-level constructs +like ``for`` loops, ``switch`` statements and function calls. It should be possible +to write assembly programs that do not make use of explicit ``SWAP``, ``DUP``, +``JUMP`` and ``JUMPI`` statements, because the first two obfuscate the data flow +and the last two obfuscate control flow. Furthermore, functional statements of +the form ``mul(add(x, y), 7)`` are preferred over pure opcode statements like +``7 y x add mul`` because in the first form, it is much easier to see which +operand is used for which opcode. + +The second goal is achieved by introducing a desugaring phase that only removes +the higher level constructs in a very regular way and still allows inspecting +the generated low-level assembly code. The only non-local operation performed +by the assembler is name lookup of user-defined identifiers (functions, variables, ...), +which follow very simple and regular scoping rules and cleanup of local variables from the stack. + +Scoping: An identifier that is declared (label, variable, function, assembly) +is only visible in the block where it was declared (including nested blocks +inside the current block). It is not legal to access local variables across +function borders, even if they would be in scope. Shadowing is allowed, but +two identifiers with the same name cannot be declared in the same block. +Local variables cannot be accessed before they were declared, but labels, +functions and assemblies can. Assemblies are special blocks that are used +for e.g. returning runtime code or creating contracts. No identifier from an +outer assembly is visible in a sub-assembly. + +If control flow passes over the end of a block, pop instructions are inserted +that match the number of local variables declared in that block, unless the +``}`` is directly preceded by an opcode that does not have a continuing control +flow path. Whenever a local variable is referenced, the code generator needs +to know its current relative position in the stack and thus it needs to +keep track of the current so-called stack height. +At the end of a block, this implicit stack height is always reduced by the number +of local variables whether ther is a continuing control flow or not. + +This means that the stack height before and after the block should be the same. +If this is not the case, a warning is issued, +unless the last instruction in the block did not have a continuing control flow path. + +Why do we use higher-level constructs like ``switch``, ``for`` and functions: + +Using ``switch``, ``for`` and functions, it should be possible to write +complex code without using ``jump`` or ``jumpi`` manually. This makes it much +easier to analyze the control flow, which allows for improved formal +verification and optimization. + +Furthermore, if manual jumps are allowed, computing the stack height is rather complicated. +The position of all local variables on the stack needs to be known, otherwise +neither references to local variables nor removing local variables automatically +from the stack at the end of a block will work properly. Because of that, +every label that is preceded by an instruction that ends or diverts control flow +should be annotated with the current stack layout. This annotation is performed +automatically during the desugaring phase. + +Example: + +We will follow an example compilation from Solidity to desugared assembly. +We consider the runtime bytecode of the following Solidity program:: + + contract C { + function f(uint x) returns (uint y) { + y = 1; + for (uint i = 0; i < x; i++) + y = 2 * y; + } + } + +The following assembly will be generated:: + + { + mstore(0x40, 0x60) // store the "free memory pointer" + // function dispatcher + switch div(calldataload(0), exp(2, 226)) + case 0xb3de648b: { + let (r) = f(calldataload(4)) + let ret := $allocate(0x20) + mstore(ret, r) + return(ret, 0x20) + } + default: { jump(invalidJumpLabel) } + // memory allocator + function $allocate(size) -> (pos) { + pos := mload(0x40) + mstore(0x40, add(pos, size)) + } + // the contract function + function f(x) -> (y) { + y := 1 + for { let i := 0 } lt(i, x) { i := add(i, 1) } { + y := mul(2, y) + } + } + } + +After the desugaring phase it looks as follows:: + + { + mstore(0x40, 0x60) + { + let $0 := div(calldataload(0), exp(2, 226)) + jumpi($case1, eq($0, 0xb3de648b)) + jump($caseDefault) + $case1: + { + // the function call - we put return label and arguments on the stack + $ret1 calldataload(4) jump($fun_f) + $ret1 [r]: // a label with a [...]-annotation resets the stack height + // to "current block + number of local variables". It also + // introduces a variable, r: + // r is at top of stack, $0 is below (from enclosing block) + $ret2 0x20 jump($fun_allocate) + $ret2 [ret]: // stack here: $0, r, ret (top) + mstore(ret, r) + return(ret, 0x20) + // although it is useless, the jump is automatically inserted, + // since the desugaring process does not analyze control-flow + jump($endswitch) + } + $caseDefault: + { + jump(invalidJumpLabel) + jump($endswitch) + } + $endswitch: + } + jump($afterFunction) + $fun_allocate: + { + $start[$retpos, size]: + // output variables live in the same scope as the arguments. + let pos := 0 + { + pos := mload(0x40) + mstore(0x40, add(pos, size)) + } + swap1 pop swap1 jump + } + $fun_f: + { + start [$retpos, x]: + let y := 0 + { + let i := 0 + $for_begin: + jumpi($for_end, iszero(lt(i, x))) + { + y := mul(2, y) + } + $for_continue: + { i := add(i, 1) } + jump($for_begin) + $for_end: + } // Here, a pop instruction is inserted for i + swap1 pop swap1 jump + } + $afterFunction: + stop + } + + +Assembly happens in four stages: + +1. Parsing +2. Desugaring (removes switch, for and functions) +3. Opcode stream generation +4. Bytecode generation + +We will specify steps one to three in a pseudo-formal way. More formal +specifications will follow. + + +Parsing / Grammar +----------------- + +The tasks of the parser are the following: + +- Turn the byte stream into a token stream, discarding C++-style comments + (a special comment exists for source references, but we will not explain it here). +- Turn the token stream into an AST according to the grammar below +- Register identifiers with the block they are defined in (annotation to the + AST node) and note from which point on, variables can be accessed. + +The assembly lexer follows the one defined by Solidity itself. + +Whitespace is used to delimit tokens and it consists of the characters +Space, Tab and Linefeed. Comments are regular JavaScript/C++ comments and +are interpreted in the same way as Whitespace. + +Grammar:: + + AssemblyBlock = '{' AssemblyItem* '}' + AssemblyItem = + Identifier | + AssemblyBlock | + FunctionalAssemblyExpression | + AssemblyLocalDefinition | + FunctionalAssemblyAssignment | + AssemblyAssignment | + LabelDefinition | + AssemblySwitch | + AssemblyFunctionDefinition | + AssemblyFor | + 'break' | 'continue' | + SubAssembly | 'dataSize' '(' Identifier ')' | + LinkerSymbol | + 'errorLabel' | 'bytecodeSize' | + NumberLiteral | StringLiteral | HexLiteral + Identifier = [a-zA-Z_$] [a-zA-Z_0-9]* + FunctionalAssemblyExpression = Identifier '(' ( AssemblyItem ( ',' AssemblyItem )* )? ')' + AssemblyLocalDefinition = 'let' IdentifierOrList ':=' FunctionalAssemblyExpression + FunctionalAssemblyAssignment = IdentifierOrList ':=' FunctionalAssemblyExpression + IdentifierOrList = Identifier | '(' IdentifierList ')' + IdentifierList = Identifier ( ',' Identifier)* + AssemblyAssignment = '=:' Identifier + LabelDefinition = Identifier ( '[' ( IdentifierList | NumberLiteral ) ']' )? ':' + AssemblySwitch = 'switch' FunctionalAssemblyExpression AssemblyCase* + ( 'default' ':' AssemblyBlock )? + AssemblyCase = 'case' FunctionalAssemblyExpression ':' AssemblyBlock + AssemblyFunctionDefinition = 'function' Identifier '(' IdentifierList? ')' + ( '->' '(' IdentifierList ')' )? AssemblyBlock + AssemblyFor = 'for' ( AssemblyBlock | FunctionalAssemblyExpression) + FunctionalAssemblyExpression ( AssemblyBlock | FunctionalAssemblyExpression) AssemblyBlock + SubAssembly = 'assembly' Identifier AssemblyBlock + LinkerSymbol = 'linkerSymbol' '(' StringLiteral ')' + NumberLiteral = HexNumber | DecimalNumber + HexLiteral = 'hex' ('"' ([0-9a-fA-F]{2})* '"' | '\'' ([0-9a-fA-F]{2})* '\'') + StringLiteral = '"' ([^"\r\n\\] | '\\' .)* '"' + HexNumber = '0x' [0-9a-fA-F]+ + DecimalNumber = [0-9]+ + + +Desugaring +---------- + +An AST transformation removes for, switch and function constructs. The result +is still parseable by the same parser, but it will not use certain constructs. +If jumpdests are added that are only jumped to and not continued at, information +about the stack content is added, unless no local variables of outer scopes are +accessed or the stack height is the same as for the previous instruction. + +Pseudocode:: + + desugar item: AST -> AST = + match item { + AssemblyFunctionDefinition('function' name '(' arg1, ..., argn ')' '->' ( '(' ret1, ..., retm ')' body) -> + <name>: + { + $<name>_start [$retPC, $argn, ..., arg1]: + let ret1 := 0 ... let retm := 0 + { desugar(body) } + swap and pop items so that only ret1, ... retn, $retPC are left on the stack + jump + } + AssemblyFor('for' { init } condition post body) -> + { + init // cannot be its own block because we want variable scope to extend into the body + // find I such that there are no labels $forI_* + $forI_begin: + jumpi($forI_end, iszero(condition)) + { body } + $forI_continue: + { post } + jump($forI_begin) + $forI_end: + } + 'break' -> + { + // find nearest enclosing scope with label $forI_end + pop all local variables that are defined at the current point + but not at $forI_end + jump($forI_end) + } + 'continue' -> + { + // find nearest enclosing scope with label $forI_continue + pop all local variables that are defined at the current point + but not at $forI_continue + jump($forI_continue) + } + AssemblySwitch(switch condition cases ( default: defaultBlock )? ) -> + { + // find I such that there is no $switchI* label or variable + let $switchI_value := condition + for each of cases match { + case val: -> jumpi($switchI_caseJ, eq($switchI_value, val)) + } + if default block present: -> + { defaultBlock jump($switchI_end) } + for each of cases match { + case val: { body } -> $switchI_caseJ: { body jump($switchI_end) } + } + $switchI_end: + } + FunctionalAssemblyExpression( identifier(arg1, arg2, ..., argn) ) -> + { + if identifier is function <name> with n args and m ret values -> + { + // find I such that $funcallI_* does not exist + $funcallI_return argn ... arg2 arg1 jump(<name>) + if the current context is `let (id1, ..., idm) := f(...)` -> + $funcallI_return [id1, ..., idm]: + else -> + $funcallI_return[m - n - 1]: + turn the functional expression that leads to the function call + into a statement stream + } + else -> desugar(children of node) + } + default node -> + desugar(children of node) + } + +Opcode Stream Generation +------------------------ + +During opcode stream generation, we keep track of the current stack height, +so that accessing stack variables by name is possible. + +Pseudocode:: + + codegen item: AST -> opcode_stream = + match item { + AssemblyBlock({ items }) -> + join(codegen(item) for item in items) + if last generated opcode has continuing control flow: + POP for all local variables registered at the block (including variables + introduced by labels) + warn if the stack height at this point is not the same as at the start of the block + Identifier(id) -> + lookup id in the syntactic stack of blocks + match type of id + Local Variable -> + DUPi where i = 1 + stack_height - stack_height_of_identifier(id) + Label -> + // reference to be resolved during bytecode generation + PUSH<bytecode position of label> + SubAssembly -> + PUSH<bytecode position of subassembly data> + FunctionalAssemblyExpression(id ( arguments ) ) -> + join(codegen(arg) for arg in arguments.reversed()) + id (which has to be an opcode, might be a function name later) + AssemblyLocalDefinition(let (id1, ..., idn) := expr) -> + register identifiers id1, ..., idn as locals in current block at current stack height + codegen(expr) - assert that expr returns n items to the stack + FunctionalAssemblyAssignment((id1, ..., idn) := expr) -> + lookup id1, ..., idn in the syntactic stack of blocks, assert that they are variables + codegen(expr) + for j = n, ..., i: + SWAPi where i = 1 + stack_height - stack_height_of_identifier(idj) + POP + AssemblyAssignment(=: id) -> + look up id in the syntactic stack of blocks, assert that it is a variable + SWAPi where i = 1 + stack_height - stack_height_of_identifier(id) + POP + LabelDefinition(name [id1, ..., idn] :) -> + JUMPDEST + // register new variables id1, ..., idn and set the stack height to + // stack_height_at_block_start + number_of_local_variables + LabelDefinition(name [number] :) -> + JUMPDEST + // adjust stack height by +number (can be negative) + NumberLiteral(num) -> + PUSH<num interpreted as decimal and right-aligned> + HexLiteral(lit) -> + PUSH32<lit interpreted as hex and left-aligned> + StringLiteral(lit) -> + PUSH32<lit utf-8 encoded and left-aligned> + SubAssembly(assembly <name> block) -> + append codegen(block) at the end of the code + dataSize(<name>) -> + assert that <name> is a subassembly -> + PUSH32<size of code generated from subassembly <name>> + linkerSymbol(<lit>) -> + PUSH32<zeros> and append position to linker table + } diff --git a/docs/conf.py b/docs/conf.py index 2bc79fd9..ecabbb86 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -56,9 +56,9 @@ copyright = '2016, Ethereum' # built documents. # # The short X.Y version. -version = '0.4.7' +version = '0.4.8' # The full version, including alpha/beta/rc tags. -release = '0.4.7-develop' +release = '0.4.8-develop' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/contracts.rst b/docs/contracts.rst index e82b7495..edc42c3d 100644 --- a/docs/contracts.rst +++ b/docs/contracts.rst @@ -877,6 +877,13 @@ cannot be resolved. A simple rule to remember is to specify the base classes in the order from "most base-like" to "most derived". +Inheriting Different Kinds of Members of the Same Name +====================================================== + +When the inheritance results in a contract with a function and a modifier of the same name, it is considered as an error. +This error is produced also by an event and a modifier of the same name, and a function and an event of the same name. +As an exception, a state variable accessor can override a public function. + .. index:: ! contract;abstract, ! abstract contract ****************** diff --git a/docs/control-structures.rst b/docs/control-structures.rst index 0802c085..6c0b0f27 100644 --- a/docs/control-structures.rst +++ b/docs/control-structures.rst @@ -207,8 +207,8 @@ Creating Contracts via ``new`` ============================== A contract can create a new contract using the ``new`` keyword. The full -code of the contract being created has to be known and, thus, recursive -creation-dependencies are now possible. +code of the contract being created has to be known in advance, so recursive +creation-dependencies are not possible. :: diff --git a/docs/grammar.txt b/docs/grammar.txt index d15fbaf7..62b4a021 100644 --- a/docs/grammar.txt +++ b/docs/grammar.txt @@ -14,7 +14,7 @@ ContractDefinition = ( 'contract' | 'library' ) Identifier ContractPart = StateVariableDeclaration | UsingForDeclaration | StructDefinition | ModifierDefinition | FunctionDefinition | EventDefinition | EnumDefinition -InheritanceSpecifier = Identifier ( '(' Expression ( ',' Expression )* ')' )? +InheritanceSpecifier = UserDefinedTypeName ( '(' Expression ( ',' Expression )* ')' )? StateVariableDeclaration = TypeName ( 'public' | 'internal' | 'private' )? Identifier ('=' Expression)? ';' UsingForDeclaration = 'using' Identifier 'for' ('*' | TypeName) ';' @@ -23,7 +23,7 @@ StructDefinition = 'struct' Identifier '{' ModifierDefinition = 'modifier' Identifier ParameterList? Block FunctionDefinition = 'function' Identifier? ParameterList ( FunctionCall | Identifier | 'constant' | 'payable' | 'external' | 'public' | 'internal' | 'private' )* - ( 'returns' ParameterList )? Block + ( 'returns' ParameterList )? ( ';' |Â Block ) EventDefinition = 'event' Identifier IndexedParameterList 'anonymous'? ';' EnumValue = Identifier @@ -35,10 +35,18 @@ TypeNameList = '(' ( TypeName (',' TypeName )* )? ')' // semantic restriction: mappings and structs (recursively) containing mappings // are not allowed in argument lists -VariableDeclaration = TypeName Identifier -TypeName = ElementaryTypeName | Identifier StorageLocation? | Mapping | ArrayTypeName | FunctionTypeName +VariableDeclaration = TypeName StorageLocation? Identifier + +TypeName = ElementaryTypeName + | UserDefinedTypeName + | Mapping + | ArrayTypeName + | FunctionTypeName + +UserDefinedTypeName = Identifier ( '.' Identifier )* + Mapping = 'mapping' '(' ElementaryTypeName '=>' TypeName ')' -ArrayTypeName = TypeName StorageLocation? '[' Expression? ']' +ArrayTypeName = TypeName '[' Expression? ']' FunctionTypeName = 'function' TypeNameList ( 'internal' | 'external' | 'constant' | 'payable' )* ( 'returns' TypeNameList )? StorageLocation = 'memory' | 'storage' @@ -69,7 +77,7 @@ Expression = | Expression '**' Expression | Expression ('*' | '/' | '%') Expression | Expression ('+' | '-') Expression - | Expression ('<<' | '>>') + | Expression ('<<' | '>>') Expression | Expression '&' Expression | Expression '^' Expression | Expression '|' Expression @@ -82,21 +90,37 @@ Expression = | Expression? (',' Expression) | PrimaryExpression -PrimaryExpression = Identifier | BooleanLiteral | NumberLiteral | HexLiteral | StringLiteral +PrimaryExpression = Identifier + | BooleanLiteral + | NumberLiteral + | HexLiteral + | StringLiteral + | ElementaryTypeNameExpression + +ExpressionList = Expression ( ',' Expression )* +NameValueList = Identifier ':' Expression ( ',' Identifier ':' Expression )* -FunctionCall = ( PrimaryExpression | NewExpression | TypeName ) ( ( '.' Identifier ) | ( '[' Expression ']' ) )* '(' Expression? ( ',' Expression )* ')' -NewExpression = 'new' Identifier +FunctionCall = ( PrimaryExpression | NewExpression | TypeName ) ( ( '.' Identifier ) | ( '[' Expression ']' ) )* '(' FunctionCallArguments ')' +FunctionCallArguments = '{' NameValueList? '}' + | ExpressionList? + +NewExpression = 'new' TypeName MemberAccess = Expression '.' Identifier IndexAccess = Expression '[' Expression? ']' BooleanLiteral = 'true' | 'false' -NumberLiteral = '0x'? [0-9]+ (' ' NumberUnit)? +NumberLiteral = ( HexNumber | DecimalNumber ) (' ' NumberUnit)? NumberUnit = 'wei' | 'szabo' | 'finney' | 'ether' | 'seconds' | 'minutes' | 'hours' | 'days' | 'weeks' | 'years' HexLiteral = 'hex' ('"' ([0-9a-fA-F]{2})* '"' | '\'' ([0-9a-fA-F]{2})* '\'') StringLiteral = '"' ([^"\r\n\\] | '\\' .)* '"' Identifier = [a-zA-Z_] [a-zA-Z_0-9]* +HexNumber = '0x' [0-9a-fA-F]+ +DecimalNumber = [0-9]+ + +ElementaryTypeNameExpression = ElementaryTypeName + ElementaryTypeName = 'address' | 'bool' | 'string' | 'var' | Int | Uint | Byte | Fixed | Ufixed diff --git a/docs/index.rst b/docs/index.rst index 9bee1515..cb79687b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -87,6 +87,15 @@ Solidity Tools * `evmdis <https://github.com/Arachnid/evmdis>`_ EVM Disassembler that performs static analysis on the bytecode to provide a higher level of abstraction than raw EVM operations. +Third-Party Solidity Parsers and Grammars +----------------------------------------- + +* `solidity-parser <https://github.com/ConsenSys/solidity-parser>`_ + Solidity parser for JavaScript + +* `Solidity Grammar for ANTLR 4 <https://github.com/federicobond/solidity-antlr4>`_ + Solidity grammar for the ANTLR 4 parser generator + Language Documentation ---------------------- diff --git a/docs/introduction-to-smart-contracts.rst b/docs/introduction-to-smart-contracts.rst index aee1e03b..4c134abc 100644 --- a/docs/introduction-to-smart-contracts.rst +++ b/docs/introduction-to-smart-contracts.rst @@ -283,8 +283,8 @@ determined at the time the contract is created (it is derived from the creator address and the number of transactions sent from that address, the so-called "nonce"). -Apart from the fact whether an account stores code or not, -the EVM treats the two types equally, though. +Regardless of whether or not the account stores code, the two types are +treated equally by the EVM. Every account has a persistent key-value store mapping 256-bit words to 256-bit words called **storage**. diff --git a/docs/layout-of-source-files.rst b/docs/layout-of-source-files.rst index 17ac8c6f..1e27b7c0 100644 --- a/docs/layout-of-source-files.rst +++ b/docs/layout-of-source-files.rst @@ -79,8 +79,9 @@ Paths ----- In the above, ``filename`` is always treated as a path with ``/`` as directory separator, -``.`` as the current and ``..`` as the parent directory. Path names that do not start -with ``.`` are treated as absolute paths. +``.`` as the current and ``..`` as the parent directory. When ``.`` or ``..`` is followed by a character except ``/``, +it is not considered as the current or the parent directory. +All path names are treated as absolute paths unless they start with the current ``.`` or the parent directory ``..``. To import a file ``x`` from the same directory as the current file, use ``import "./x" as x;``. If you use ``import "x" as x;`` instead, a different file could be referenced @@ -96,8 +97,8 @@ Use in Actual Compilers When the compiler is invoked, it is not only possible to specify how to discover the first element of a path, but it is possible to specify path prefix remappings so that e.g. ``github.com/ethereum/dapp-bin/library`` is remapped to -``/usr/local/dapp-bin/library`` and the compiler will read the files from there. If -remapping keys are prefixes of each other, the longest is tried first. This +``/usr/local/dapp-bin/library`` and the compiler will read the files from there. +If multiple remappings can be applied, the one with the longest key is tried first. This allows for a "fallback-remapping" with e.g. ``""`` maps to ``"/usr/local/include/solidity"``. Furthermore, these remappings can depend on the context, which allows you to configure packages to diff --git a/docs/miscellaneous.rst b/docs/miscellaneous.rst index 4a9dad87..378c3c96 100644 --- a/docs/miscellaneous.rst +++ b/docs/miscellaneous.rst @@ -570,3 +570,4 @@ Language Grammar ================ .. literalinclude:: grammar.txt + :language: none diff --git a/docs/solidity-in-depth.rst b/docs/solidity-in-depth.rst index 40704698..b6217b47 100644 --- a/docs/solidity-in-depth.rst +++ b/docs/solidity-in-depth.rst @@ -16,4 +16,5 @@ If something is missing here, please contact us on units-and-global-variables.rst control-structures.rst contracts.rst + assembly.rst miscellaneous.rst diff --git a/docs/types.rst b/docs/types.rst index 069a9190..6b67e684 100644 --- a/docs/types.rst +++ b/docs/types.rst @@ -765,7 +765,7 @@ assigning it to a local variable, as in Mappings ======== -Mapping types are declared as ``mapping _KeyType => _ValueType``. +Mapping types are declared as ``mapping(_KeyType => _ValueType)``. Here ``_KeyType`` can be almost any type except for a mapping, a dynamically sized array, a contract, an enum and a struct. ``_ValueType`` can actually be any type, including mappings. diff --git a/libdevcore/Assertions.h b/libdevcore/Assertions.h index 05e0b0e5..0fb5837c 100644 --- a/libdevcore/Assertions.h +++ b/libdevcore/Assertions.h @@ -73,27 +73,19 @@ inline bool assertEqualAux(A const& _a, B const& _b, char const* _aStr, char con /// Use it as assertThrow(1 == 1, ExceptionType, "Mathematics is wrong."); /// Do NOT supply an exception object as the second parameter. #define assertThrow(_condition, _ExceptionType, _description) \ - ::dev::assertThrowAux<_ExceptionType>(!!(_condition), _description, __LINE__, __FILE__, ETH_FUNC) + do \ + { \ + if (!(_condition)) \ + ::boost::throw_exception( \ + _ExceptionType() << \ + ::dev::errinfo_comment(_description) << \ + ::boost::throw_function(ETH_FUNC) << \ + ::boost::throw_file(__FILE__) << \ + ::boost::throw_line(__LINE__) \ + ); \ + } \ + while (false) using errinfo_comment = boost::error_info<struct tag_comment, std::string>; -template <class _ExceptionType> -inline void assertThrowAux( - bool _condition, - ::std::string const& _errorDescription, - unsigned _line, - char const* _file, - char const* _function -) -{ - if (!_condition) - ::boost::throw_exception( - _ExceptionType() << - ::dev::errinfo_comment(_errorDescription) << - ::boost::throw_function(_function) << - ::boost::throw_file(_file) << - ::boost::throw_line(_line) - ); -} - } diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index a9ca24dc..845abfd4 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -117,69 +117,36 @@ string Assembly::locationFromSources(StringMap const& _sourceCodes, SourceLocati ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap const& _sourceCodes) const { - _out << _prefix << ".code:" << endl; - for (AssemblyItem const& i: m_items) + for (size_t i = 0; i < m_items.size(); ++i) { - _out << _prefix; - switch (i.type()) + AssemblyItem const& item = m_items[i]; + if (!item.location().isEmpty() && (i == 0 || m_items[i - 1].location() != item.location())) { - case Operation: - _out << " " << instructionInfo(i.instruction()).name << "\t" << i.getJumpTypeAsString(); - break; - case Push: - _out << " PUSH" << dec << max<unsigned>(1, dev::bytesRequired(i.data())) << " 0x" << hex << i.data(); - break; - case PushString: - _out << " PUSH \"" << m_strings.at((h256)i.data()) << "\""; - break; - case PushTag: - if (i.data() == 0) - _out << " PUSH [ErrorTag]"; - else - { - size_t subId = i.splitForeignPushTag().first; - if (subId == size_t(-1)) - _out << " PUSH [tag" << dec << i.splitForeignPushTag().second << "]"; - else - _out << " PUSH [tag" << dec << subId << ":" << i.splitForeignPushTag().second << "]"; - } - break; - case PushSub: - _out << " PUSH [$" << size_t(i.data()) << "]"; - break; - case PushSubSize: - _out << " PUSH #[$" << size_t(i.data()) << "]"; - break; - case PushProgramSize: - _out << " PUSHSIZE"; - break; - case PushLibraryAddress: - _out << " PUSHLIB \"" << m_libraries.at(h256(i.data())) << "\""; - break; - case Tag: - _out << "tag" << dec << i.data() << ": " << endl << _prefix << " JUMPDEST"; - break; - case PushData: - _out << " PUSH [" << hex << (unsigned)i.data() << "]"; - break; - default: - BOOST_THROW_EXCEPTION(InvalidOpcode()); + _out << _prefix << " /*"; + if (item.location().sourceName) + _out << " \"" + *item.location().sourceName + "\""; + if (!item.location().isEmpty()) + _out << ":" << to_string(item.location().start) + ":" + to_string(item.location().end); + _out << " */" << endl; } - _out << "\t\t" << locationFromSources(_sourceCodes, i.location()) << endl; + _out << _prefix << (item.type() == Tag ? "" : " ") << item.toAssemblyText() << endl; } if (!m_data.empty() || !m_subs.empty()) { - _out << _prefix << ".data:" << endl; + _out << _prefix << "stop" << endl; + Json::Value data; for (auto const& i: m_data) - if (u256(i.first) >= m_subs.size()) - _out << _prefix << " " << hex << (unsigned)(u256)i.first << ": " << dev::toHex(i.second) << endl; + assertThrow(u256(i.first) < m_subs.size(), AssemblyException, "Data not yet implemented."); + for (size_t i = 0; i < m_subs.size(); ++i) { - _out << _prefix << " " << hex << i << ": " << endl; - m_subs[i]->stream(_out, _prefix + " ", _sourceCodes); + _out << endl << _prefix << "sub_" << i << ": assembly {\n"; + m_subs[i]->streamAsm(_out, _prefix + " ", _sourceCodes); + _out << _prefix << "}" << endl; } } + return _out; } @@ -449,7 +416,7 @@ LinkerObject const& Assembly::assemble() const switch (i.type()) { case Operation: - ret.bytecode.push_back((byte)i.data()); + ret.bytecode.push_back((byte)i.instruction()); break; case PushString: { diff --git a/libevmasm/AssemblyItem.cpp b/libevmasm/AssemblyItem.cpp index 54e38de8..6c7d5425 100644 --- a/libevmasm/AssemblyItem.cpp +++ b/libevmasm/AssemblyItem.cpp @@ -20,6 +20,7 @@ */ #include "AssemblyItem.h" +#include <libevmasm/SemanticInformation.h> #include <fstream> using namespace std; @@ -28,19 +29,19 @@ using namespace dev::eth; AssemblyItem AssemblyItem::toSubAssemblyTag(size_t _subId) const { - assertThrow(m_data < (u256(1) << 64), Exception, "Tag already has subassembly set."); + assertThrow(data() < (u256(1) << 64), Exception, "Tag already has subassembly set."); assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); AssemblyItem r = *this; r.m_type = PushTag; - r.setPushTagSubIdAndTag(_subId, size_t(m_data)); + r.setPushTagSubIdAndTag(_subId, size_t(data())); return r; } pair<size_t, size_t> AssemblyItem::splitForeignPushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); - return make_pair(size_t(m_data / (u256(1) << 64)) - 1, size_t(m_data)); + return make_pair(size_t((data()) / (u256(1) << 64)) - 1, size_t(data())); } void AssemblyItem::setPushTagSubIdAndTag(size_t _subId, size_t _tag) @@ -59,7 +60,7 @@ unsigned AssemblyItem::bytesRequired(unsigned _addressLength) const case PushString: return 33; case Push: - return 1 + max<unsigned>(1, dev::bytesRequired(m_data)); + return 1 + max<unsigned>(1, dev::bytesRequired(data())); case PushSubSize: case PushProgramSize: return 4; // worst case: a 16MB program @@ -97,6 +98,28 @@ int AssemblyItem::deposit() const return 0; } +bool AssemblyItem::canBeFunctional() const +{ + switch (m_type) + { + case Operation: + return !SemanticInformation::isDupInstruction(*this) && !SemanticInformation::isSwapInstruction(*this); + case Push: + case PushString: + case PushTag: + case PushData: + case PushSub: + case PushSubSize: + case PushProgramSize: + case PushLibraryAddress: + return true; + case Tag: + return false; + default:; + } + return 0; +} + string AssemblyItem::getJumpTypeAsString() const { switch (m_jumpType) @@ -111,6 +134,65 @@ string AssemblyItem::getJumpTypeAsString() const } } +string AssemblyItem::toAssemblyText() const +{ + string text; + switch (type()) + { + case Operation: + { + assertThrow(isValidInstruction(instruction()), AssemblyException, "Invalid instruction."); + string name = instructionInfo(instruction()).name; + transform(name.begin(), name.end(), name.begin(), [](unsigned char _c) { return tolower(_c); }); + text = name; + break; + } + case Push: + text = toHex(toCompactBigEndian(data(), 1), 1, HexPrefix::Add); + break; + case PushString: + assertThrow(false, AssemblyException, "Push string assembly output not implemented."); + break; + case PushTag: + assertThrow(data() < 0x10000, AssemblyException, "Sub-assembly tags not yet implemented."); + text = string("tag_") + to_string(size_t(data())); + break; + case Tag: + assertThrow(data() < 0x10000, AssemblyException, "Sub-assembly tags not yet implemented."); + text = string("tag_") + to_string(size_t(data())) + ":"; + break; + case PushData: + assertThrow(false, AssemblyException, "Push data not implemented."); + break; + case PushSub: + text = string("dataOffset(sub_") + to_string(size_t(data())) + ")"; + break; + case PushSubSize: + text = string("dataSize(sub_") + to_string(size_t(data())) + ")"; + break; + case PushProgramSize: + text = string("bytecodeSize"); + break; + case PushLibraryAddress: + text = string("linkerSymbol(\"") + toHex(data()) + string("\")"); + break; + case UndefinedItem: + assertThrow(false, AssemblyException, "Invalid assembly item."); + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + if (m_jumpType == JumpType::IntoFunction || m_jumpType == JumpType::OutOfFunction) + { + text += "\t//"; + if (m_jumpType == JumpType::IntoFunction) + text += " in"; + else + text += " out"; + } + return text; +} + ostream& dev::eth::operator<<(ostream& _out, AssemblyItem const& _item) { switch (_item.type()) diff --git a/libevmasm/AssemblyItem.h b/libevmasm/AssemblyItem.h index b5bd3ed8..002b3c87 100644 --- a/libevmasm/AssemblyItem.h +++ b/libevmasm/AssemblyItem.h @@ -59,16 +59,22 @@ public: AssemblyItem(u256 _push, SourceLocation const& _location = SourceLocation()): AssemblyItem(Push, _push, _location) { } AssemblyItem(solidity::Instruction _i, SourceLocation const& _location = SourceLocation()): - AssemblyItem(Operation, byte(_i), _location) { } + m_type(Operation), + m_instruction(_i), + m_location(_location) + {} AssemblyItem(AssemblyItemType _type, u256 _data = 0, SourceLocation const& _location = SourceLocation()): m_type(_type), - m_data(_data), m_location(_location) { + if (m_type == Operation) + m_instruction = Instruction(byte(_data)); + else + m_data = std::make_shared<u256>(_data); } - AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, m_data); } - AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, m_data); } + AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, data()); } + AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, data()); } /// Converts the tag to a subassembly tag. This has to be called in order to move a tag across assemblies. /// @param _subId the identifier of the subassembly the tag is taken from. AssemblyItem toSubAssemblyTag(size_t _subId) const; @@ -79,25 +85,42 @@ public: void setPushTagSubIdAndTag(size_t _subId, size_t _tag); AssemblyItemType type() const { return m_type; } - u256 const& data() const { return m_data; } - void setType(AssemblyItemType const _type) { m_type = _type; } - void setData(u256 const& _data) { m_data = _data; } + u256 const& data() const { assertThrow(m_type != Operation, Exception, ""); return *m_data; } + void setData(u256 const& _data) { assertThrow(m_type != Operation, Exception, ""); m_data = std::make_shared<u256>(_data); } /// @returns the instruction of this item (only valid if type() == Operation) - Instruction instruction() const { return Instruction(byte(m_data)); } + Instruction instruction() const { assertThrow(m_type == Operation, Exception, ""); return m_instruction; } /// @returns true if the type and data of the items are equal. - bool operator==(AssemblyItem const& _other) const { return m_type == _other.m_type && m_data == _other.m_data; } + bool operator==(AssemblyItem const& _other) const + { + if (type() != _other.type()) + return false; + if (type() == Operation) + return instruction() == _other.instruction(); + else + return data() == _other.data(); + } bool operator!=(AssemblyItem const& _other) const { return !operator==(_other); } /// Less-than operator compatible with operator==. - bool operator<(AssemblyItem const& _other) const { return std::tie(m_type, m_data) < std::tie(_other.m_type, _other.m_data); } + bool operator<(AssemblyItem const& _other) const + { + if (type() != _other.type()) + return type() < _other.type(); + else if (type() == Operation) + return instruction() < _other.instruction(); + else + return data() < _other.data(); + } /// @returns an upper bound for the number of bytes required by this item, assuming that /// the value of a jump tag takes @a _addressLength bytes. unsigned bytesRequired(unsigned _addressLength) const; int deposit() const; - bool match(AssemblyItem const& _i) const { return _i.m_type == UndefinedItem || (m_type == _i.m_type && (m_type != Operation || m_data == _i.m_data)); } + /// @returns true if the assembly item can be used in a functional context. + bool canBeFunctional() const; + void setLocation(SourceLocation const& _location) { m_location = _location; } SourceLocation const& location() const { return m_location; } @@ -108,9 +131,12 @@ public: void setPushedValue(u256 const& _value) const { m_pushedValue = std::make_shared<u256>(_value); } u256 const* pushedValue() const { return m_pushedValue.get(); } + std::string toAssemblyText() const; + private: AssemblyItemType m_type; - u256 m_data; + Instruction m_instruction; ///< Only valid if m_type == Operation + std::shared_ptr<u256> m_data; ///< Only valid if m_type != Operation SourceLocation m_location; JumpType m_jumpType = JumpType::Ordinary; /// Pushed value for operations with data to be determined during assembly stage, diff --git a/libevmasm/CommonSubexpressionEliminator.cpp b/libevmasm/CommonSubexpressionEliminator.cpp index 6294e579..fd4fffa6 100644 --- a/libevmasm/CommonSubexpressionEliminator.cpp +++ b/libevmasm/CommonSubexpressionEliminator.cpp @@ -303,7 +303,9 @@ void CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) for (auto it: m_classPositions) for (auto p: it.second) if (p > m_stackHeight) + { assertThrow(false, OptimizerException, ""); + } // do some cleanup removeStackTopIfPossible(); diff --git a/libevmasm/ConstantOptimiser.cpp b/libevmasm/ConstantOptimiser.cpp index f4a50c2d..86244e17 100644 --- a/libevmasm/ConstantOptimiser.cpp +++ b/libevmasm/ConstantOptimiser.cpp @@ -38,6 +38,7 @@ unsigned ConstantOptimisationMethod::optimiseConstants( for (AssemblyItem const& item: _items) if (item.type() == Push) pushes[item]++; + map<u256, AssemblyItems> pendingReplacements; for (auto it: pushes) { AssemblyItem const& item = it.first; @@ -53,17 +54,22 @@ unsigned ConstantOptimisationMethod::optimiseConstants( bigint copyGas = copy.gasNeeded(); ComputeMethod compute(params, item.data()); bigint computeGas = compute.gasNeeded(); + AssemblyItems replacement; if (copyGas < literalGas && copyGas < computeGas) { - copy.execute(_assembly, _items); + replacement = copy.execute(_assembly); optimisations++; } - else if (computeGas < literalGas && computeGas < copyGas) + else if (computeGas < literalGas && computeGas <= copyGas) { - compute.execute(_assembly, _items); + replacement = compute.execute(_assembly); optimisations++; } + if (!replacement.empty()) + pendingReplacements[item.data()] = replacement; } + if (!pendingReplacements.empty()) + replaceConstants(_items, pendingReplacements); return optimisations; } @@ -101,18 +107,24 @@ size_t ConstantOptimisationMethod::bytesRequired(AssemblyItems const& _items) void ConstantOptimisationMethod::replaceConstants( AssemblyItems& _items, - AssemblyItems const& _replacement -) const + map<u256, AssemblyItems> const& _replacements +) { - assertThrow(_items.size() > 0, OptimizerException, ""); - for (size_t i = 0; i < _items.size(); ++i) + AssemblyItems replaced; + for (AssemblyItem const& item: _items) { - if (_items.at(i) != AssemblyItem(m_value)) - continue; - _items[i] = _replacement[0]; - _items.insert(_items.begin() + i + 1, _replacement.begin() + 1, _replacement.end()); - i += _replacement.size() - 1; + if (item.type() == Push) + { + auto it = _replacements.find(item.data()); + if (it != _replacements.end()) + { + replaced += it->second; + continue; + } + } + replaced.push_back(item); } + _items = std::move(replaced); } bigint LiteralMethod::gasNeeded() @@ -128,38 +140,44 @@ bigint LiteralMethod::gasNeeded() CodeCopyMethod::CodeCopyMethod(Params const& _params, u256 const& _value): ConstantOptimisationMethod(_params, _value) { - m_copyRoutine = AssemblyItems{ - u256(0), - Instruction::DUP1, - Instruction::MLOAD, // back up memory - u256(32), - AssemblyItem(PushData, u256(1) << 16), // has to be replaced - Instruction::DUP4, - Instruction::CODECOPY, - Instruction::DUP2, - Instruction::MLOAD, - Instruction::SWAP2, - Instruction::MSTORE - }; } bigint CodeCopyMethod::gasNeeded() { return combineGas( // Run gas: we ignore memory increase costs - simpleRunGas(m_copyRoutine) + GasCosts::copyGas, + simpleRunGas(copyRoutine()) + GasCosts::copyGas, // Data gas for copy routines: Some bytes are zero, but we ignore them. - bytesRequired(m_copyRoutine) * (m_params.isCreation ? GasCosts::txDataNonZeroGas : GasCosts::createDataGas), + bytesRequired(copyRoutine()) * (m_params.isCreation ? GasCosts::txDataNonZeroGas : GasCosts::createDataGas), // Data gas for data itself dataGas(toBigEndian(m_value)) ); } -void CodeCopyMethod::execute(Assembly& _assembly, AssemblyItems& _items) +AssemblyItems CodeCopyMethod::execute(Assembly& _assembly) { bytes data = toBigEndian(m_value); - m_copyRoutine[4] = _assembly.newData(data); - replaceConstants(_items, m_copyRoutine); + AssemblyItems actualCopyRoutine = copyRoutine(); + actualCopyRoutine[4] = _assembly.newData(data); + return actualCopyRoutine; +} + +AssemblyItems const& CodeCopyMethod::copyRoutine() const +{ + AssemblyItems static copyRoutine{ + u256(0), + Instruction::DUP1, + Instruction::MLOAD, // back up memory + u256(32), + AssemblyItem(PushData, u256(1) << 16), // has to be replaced + Instruction::DUP4, + Instruction::CODECOPY, + Instruction::DUP2, + Instruction::MLOAD, + Instruction::SWAP2, + Instruction::MSTORE + }; + return copyRoutine; } AssemblyItems ComputeMethod::findRepresentation(u256 const& _value) diff --git a/libevmasm/ConstantOptimiser.h b/libevmasm/ConstantOptimiser.h index b35b2a69..dfa2fbf8 100644 --- a/libevmasm/ConstantOptimiser.h +++ b/libevmasm/ConstantOptimiser.h @@ -60,7 +60,10 @@ public: explicit ConstantOptimisationMethod(Params const& _params, u256 const& _value): m_params(_params), m_value(_value) {} virtual bigint gasNeeded() = 0; - virtual void execute(Assembly& _assembly, AssemblyItems& _items) = 0; + /// Executes the method, potentially appending to the assembly and returns a vector of + /// assembly items the constant should be relpaced with in one sweep. + /// If the vector is empty, the constants will not be deleted. + virtual AssemblyItems execute(Assembly& _assembly) = 0; protected: size_t dataSize() const { return std::max<size_t>(1, dev::bytesRequired(m_value)); } @@ -83,8 +86,8 @@ protected: return m_params.runs * _runGas + m_params.multiplicity * _repeatedDataGas + _uniqueDataGas; } - /// Replaces the constant by the code given in @a _replacement. - void replaceConstants(AssemblyItems& _items, AssemblyItems const& _replacement) const; + /// Replaces all constants i by the code given in @a _replacement[i]. + static void replaceConstants(AssemblyItems& _items, std::map<u256, AssemblyItems> const& _replacement); Params m_params; u256 const& m_value; @@ -100,7 +103,7 @@ public: explicit LiteralMethod(Params const& _params, u256 const& _value): ConstantOptimisationMethod(_params, _value) {} virtual bigint gasNeeded() override; - virtual void execute(Assembly&, AssemblyItems&) override {} + virtual AssemblyItems execute(Assembly&) override { return AssemblyItems{}; } }; /** @@ -111,10 +114,10 @@ class CodeCopyMethod: public ConstantOptimisationMethod public: explicit CodeCopyMethod(Params const& _params, u256 const& _value); virtual bigint gasNeeded() override; - virtual void execute(Assembly& _assembly, AssemblyItems& _items) override; + virtual AssemblyItems execute(Assembly& _assembly) override; protected: - AssemblyItems m_copyRoutine; + AssemblyItems const& copyRoutine() const; }; /** @@ -130,9 +133,9 @@ public: } virtual bigint gasNeeded() override { return gasNeeded(m_routine); } - virtual void execute(Assembly&, AssemblyItems& _items) override + virtual AssemblyItems execute(Assembly&) override { - replaceConstants(_items, m_routine); + return m_routine; } protected: diff --git a/libevmasm/ExpressionClasses.cpp b/libevmasm/ExpressionClasses.cpp index d5ccd7e3..fc283b0b 100644 --- a/libevmasm/ExpressionClasses.cpp +++ b/libevmasm/ExpressionClasses.cpp @@ -29,6 +29,7 @@ #include <boost/noncopyable.hpp> #include <libevmasm/Assembly.h> #include <libevmasm/CommonSubexpressionEliminator.h> +#include <libevmasm/SimplificationRules.h> using namespace std; using namespace dev; @@ -40,8 +41,18 @@ bool ExpressionClasses::Expression::operator<(ExpressionClasses::Expression cons assertThrow(!!item && !!_other.item, OptimizerException, ""); auto type = item->type(); auto otherType = _other.item->type(); - return std::tie(type, item->data(), arguments, sequenceNumber) < - std::tie(otherType, _other.item->data(), _other.arguments, _other.sequenceNumber); + if (type != otherType) + return type < otherType; + else if (type == Operation) + { + auto instr = item->instruction(); + auto otherInstr = _other.item->instruction(); + return std::tie(instr, arguments, sequenceNumber) < + std::tie(otherInstr, _other.arguments, _other.sequenceNumber); + } + else + return std::tie(item->data(), arguments, sequenceNumber) < + std::tie(_other.item->data(), _other.arguments, _other.sequenceNumber); } ExpressionClasses::Id ExpressionClasses::find( @@ -170,191 +181,6 @@ string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const return str.str(); } -class Rules: public boost::noncopyable -{ -public: - Rules(); - void resetMatchGroups() { m_matchGroups.clear(); } - vector<pair<Pattern, function<Pattern()>>> rules() const { return m_rules; } - -private: - using Expression = ExpressionClasses::Expression; - map<unsigned, Expression const*> m_matchGroups; - vector<pair<Pattern, function<Pattern()>>> m_rules; -}; - -template <class S> S divWorkaround(S const& _a, S const& _b) -{ - return (S)(bigint(_a) / bigint(_b)); -} - -template <class S> S modWorkaround(S const& _a, S const& _b) -{ - return (S)(bigint(_a) % bigint(_b)); -} - -Rules::Rules() -{ - // Multiple occurences of one of these inside one rule must match the same equivalence class. - // Constants. - Pattern A(Push); - Pattern B(Push); - Pattern C(Push); - // Anything. - Pattern X; - Pattern Y; - Pattern Z; - A.setMatchGroup(1, m_matchGroups); - B.setMatchGroup(2, m_matchGroups); - C.setMatchGroup(3, m_matchGroups); - X.setMatchGroup(4, m_matchGroups); - Y.setMatchGroup(5, m_matchGroups); - Z.setMatchGroup(6, m_matchGroups); - - m_rules = vector<pair<Pattern, function<Pattern()>>>{ - // arithmetics on constants - {{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }}, - {{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }}, - {{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }}, - {{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : divWorkaround(A.d(), B.d()); }}, - {{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(divWorkaround(u2s(A.d()), u2s(B.d()))); }}, - {{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : modWorkaround(A.d(), B.d()); }}, - {{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(modWorkaround(u2s(A.d()), u2s(B.d()))); }}, - {{Instruction::EXP, {A, B}}, [=]{ return u256(boost::multiprecision::powm(bigint(A.d()), bigint(B.d()), bigint(1) << 256)); }}, - {{Instruction::NOT, {A}}, [=]{ return ~A.d(); }}, - {{Instruction::LT, {A, B}}, [=]() { return A.d() < B.d() ? u256(1) : 0; }}, - {{Instruction::GT, {A, B}}, [=]() -> u256 { return A.d() > B.d() ? 1 : 0; }}, - {{Instruction::SLT, {A, B}}, [=]() -> u256 { return u2s(A.d()) < u2s(B.d()) ? 1 : 0; }}, - {{Instruction::SGT, {A, B}}, [=]() -> u256 { return u2s(A.d()) > u2s(B.d()) ? 1 : 0; }}, - {{Instruction::EQ, {A, B}}, [=]() -> u256 { return A.d() == B.d() ? 1 : 0; }}, - {{Instruction::ISZERO, {A}}, [=]() -> u256 { return A.d() == 0 ? 1 : 0; }}, - {{Instruction::AND, {A, B}}, [=]{ return A.d() & B.d(); }}, - {{Instruction::OR, {A, B}}, [=]{ return A.d() | B.d(); }}, - {{Instruction::XOR, {A, B}}, [=]{ return A.d() ^ B.d(); }}, - {{Instruction::BYTE, {A, B}}, [=]{ return A.d() >= 32 ? 0 : (B.d() >> unsigned(8 * (31 - A.d()))) & 0xff; }}, - {{Instruction::ADDMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) + bigint(B.d())) % C.d()); }}, - {{Instruction::MULMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) * bigint(B.d())) % C.d()); }}, - {{Instruction::MULMOD, {A, B, C}}, [=]{ return A.d() * B.d(); }}, - {{Instruction::SIGNEXTEND, {A, B}}, [=]() -> u256 { - if (A.d() >= 31) - return B.d(); - unsigned testBit = unsigned(A.d()) * 8 + 7; - u256 mask = (u256(1) << testBit) - 1; - return u256(boost::multiprecision::bit_test(B.d(), testBit) ? B.d() | ~mask : B.d() & mask); - }}, - - // invariants involving known constants - {{Instruction::ADD, {X, 0}}, [=]{ return X; }}, - {{Instruction::SUB, {X, 0}}, [=]{ return X; }}, - {{Instruction::MUL, {X, 1}}, [=]{ return X; }}, - {{Instruction::DIV, {X, 1}}, [=]{ return X; }}, - {{Instruction::SDIV, {X, 1}}, [=]{ return X; }}, - {{Instruction::OR, {X, 0}}, [=]{ return X; }}, - {{Instruction::XOR, {X, 0}}, [=]{ return X; }}, - {{Instruction::AND, {X, ~u256(0)}}, [=]{ return X; }}, - {{Instruction::AND, {X, 0}}, [=]{ return u256(0); }}, - {{Instruction::MUL, {X, 0}}, [=]{ return u256(0); }}, - {{Instruction::DIV, {X, 0}}, [=]{ return u256(0); }}, - {{Instruction::DIV, {0, X}}, [=]{ return u256(0); }}, - {{Instruction::MOD, {X, 0}}, [=]{ return u256(0); }}, - {{Instruction::MOD, {0, X}}, [=]{ return u256(0); }}, - {{Instruction::OR, {X, ~u256(0)}}, [=]{ return ~u256(0); }}, - {{Instruction::EQ, {X, 0}}, [=]() -> Pattern { return {Instruction::ISZERO, {X}}; } }, - // operations involving an expression and itself - {{Instruction::AND, {X, X}}, [=]{ return X; }}, - {{Instruction::OR, {X, X}}, [=]{ return X; }}, - {{Instruction::XOR, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::SUB, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::EQ, {X, X}}, [=]{ return u256(1); }}, - {{Instruction::LT, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::SLT, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::GT, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::SGT, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::MOD, {X, X}}, [=]{ return u256(0); }}, - - {{Instruction::NOT, {{Instruction::NOT, {X}}}}, [=]{ return X; }}, - {{Instruction::XOR, {{{X}, {Instruction::XOR, {X, Y}}}}}, [=]{ return Y; }}, - {{Instruction::OR, {{{X}, {Instruction::AND, {X, Y}}}}}, [=]{ return X; }}, - {{Instruction::AND, {{{X}, {Instruction::OR, {X, Y}}}}}, [=]{ return X; }}, - {{Instruction::AND, {{{X}, {Instruction::NOT, {X}}}}}, [=]{ return u256(0); }}, - {{Instruction::OR, {{{X}, {Instruction::NOT, {X}}}}}, [=]{ return ~u256(0); }}, - }; - // Double negation of opcodes with binary result - for (auto const& op: vector<Instruction>{ - Instruction::EQ, - Instruction::LT, - Instruction::SLT, - Instruction::GT, - Instruction::SGT - }) - m_rules.push_back({ - {Instruction::ISZERO, {{Instruction::ISZERO, {{op, {X, Y}}}}}}, - [=]() -> Pattern { return {op, {X, Y}}; } - }); - m_rules.push_back({ - {Instruction::ISZERO, {{Instruction::ISZERO, {{Instruction::ISZERO, {X}}}}}}, - [=]() -> Pattern { return {Instruction::ISZERO, {X}}; } - }); - m_rules.push_back({ - {Instruction::ISZERO, {{Instruction::XOR, {X, Y}}}}, - [=]() -> Pattern { return { Instruction::EQ, {X, Y} }; } - }); - // Associative operations - for (auto const& opFun: vector<pair<Instruction,function<u256(u256 const&,u256 const&)>>>{ - {Instruction::ADD, plus<u256>()}, - {Instruction::MUL, multiplies<u256>()}, - {Instruction::AND, bit_and<u256>()}, - {Instruction::OR, bit_or<u256>()}, - {Instruction::XOR, bit_xor<u256>()} - }) - { - auto op = opFun.first; - auto fun = opFun.second; - // Moving constants to the outside, order matters here! - // we need actions that return expressions (or patterns?) here, and we need also reversed rules - // (X+A)+B -> X+(A+B) - m_rules += vector<pair<Pattern, function<Pattern()>>>{{ - {op, {{op, {X, A}}, B}}, - [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } - }, { - // X+(Y+A) -> (X+Y)+A - {op, {{op, {X, A}}, Y}}, - [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } - }, { - // For now, we still need explicit commutativity for the inner pattern - {op, {{op, {A, X}}, B}}, - [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } - }, { - {op, {{op, {A, X}}, Y}}, - [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } - }}; - } - // move constants across subtractions - m_rules += vector<pair<Pattern, function<Pattern()>>>{ - { - // X - A -> X + (-A) - {Instruction::SUB, {X, A}}, - [=]() -> Pattern { return {Instruction::ADD, {X, 0 - A.d()}}; } - }, { - // (X + A) - Y -> (X - Y) + A - {Instruction::SUB, {{Instruction::ADD, {X, A}}, Y}}, - [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } - }, { - // (A + X) - Y -> (X - Y) + A - {Instruction::SUB, {{Instruction::ADD, {A, X}}, Y}}, - [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } - }, { - // X - (Y + A) -> (X - Y) + (-A) - {Instruction::SUB, {X, {Instruction::ADD, {Y, A}}}}, - [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } - }, { - // X - (A + Y) -> (X - Y) + (-A) - {Instruction::SUB, {X, {Instruction::ADD, {A, Y}}}}, - [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } - } - }; -} - ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, bool _secondRun) { static Rules rules; @@ -366,21 +192,17 @@ ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, ) return -1; - for (auto const& rule: rules.rules()) + if (auto match = rules.findFirstMatch(_expr, *this)) { - rules.resetMatchGroups(); - if (rule.first.matches(_expr, *this)) - { - // Debug info - //cout << "Simplifying " << *_expr.item << "("; - //for (Id arg: _expr.arguments) - // cout << fullDAGToString(arg) << ", "; - //cout << ")" << endl; - //cout << "with rule " << rule.first.toString() << endl; - //ExpressionTemplate t(rule.second()); - //cout << "to " << rule.second().toString() << endl; - return rebuildExpression(ExpressionTemplate(rule.second(), _expr.item->location())); - } + // Debug info + //cout << "Simplifying " << *_expr.item << "("; + //for (Id arg: _expr.arguments) + // cout << fullDAGToString(arg) << ", "; + //cout << ")" << endl; + //cout << "with rule " << match->first.toString() << endl; + //ExpressionTemplate t(match->second()); + //cout << "to " << match->second().toString() << endl; + return rebuildExpression(ExpressionTemplate(match->second(), _expr.item->location())); } if (!_secondRun && _expr.arguments.size() == 2 && SemanticInformation::isCommutativeOperation(*_expr.item)) @@ -403,122 +225,3 @@ ExpressionClasses::Id ExpressionClasses::rebuildExpression(ExpressionTemplate co arguments.push_back(rebuildExpression(t)); return find(_template.item, arguments); } - - -Pattern::Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments): - m_type(Operation), - m_requireDataMatch(true), - m_data(_instruction), - m_arguments(_arguments) -{ -} - -void Pattern::setMatchGroup(unsigned _group, map<unsigned, Expression const*>& _matchGroups) -{ - m_matchGroup = _group; - m_matchGroups = &_matchGroups; -} - -bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const -{ - if (!matchesBaseItem(_expr.item)) - return false; - if (m_matchGroup) - { - if (!m_matchGroups->count(m_matchGroup)) - (*m_matchGroups)[m_matchGroup] = &_expr; - else if ((*m_matchGroups)[m_matchGroup]->id != _expr.id) - return false; - } - assertThrow(m_arguments.size() == 0 || _expr.arguments.size() == m_arguments.size(), OptimizerException, ""); - for (size_t i = 0; i < m_arguments.size(); ++i) - if (!m_arguments[i].matches(_classes.representative(_expr.arguments[i]), _classes)) - return false; - return true; -} - -AssemblyItem Pattern::toAssemblyItem(SourceLocation const& _location) const -{ - return AssemblyItem(m_type, m_data, _location); -} - -string Pattern::toString() const -{ - stringstream s; - switch (m_type) - { - case Operation: - s << instructionInfo(Instruction(unsigned(m_data))).name; - break; - case Push: - s << "PUSH " << hex << m_data; - break; - case UndefinedItem: - s << "ANY"; - break; - default: - s << "t=" << dec << m_type << " d=" << hex << m_data; - break; - } - if (!m_requireDataMatch) - s << " ~"; - if (m_matchGroup) - s << "[" << dec << m_matchGroup << "]"; - s << "("; - for (Pattern const& p: m_arguments) - s << p.toString() << ", "; - s << ")"; - return s.str(); -} - -bool Pattern::matchesBaseItem(AssemblyItem const* _item) const -{ - if (m_type == UndefinedItem) - return true; - if (!_item) - return false; - if (m_type != _item->type()) - return false; - if (m_requireDataMatch && m_data != _item->data()) - return false; - return true; -} - -Pattern::Expression const& Pattern::matchGroupValue() const -{ - assertThrow(m_matchGroup > 0, OptimizerException, ""); - assertThrow(!!m_matchGroups, OptimizerException, ""); - assertThrow((*m_matchGroups)[m_matchGroup], OptimizerException, ""); - return *(*m_matchGroups)[m_matchGroup]; -} - - -ExpressionTemplate::ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location) -{ - if (_pattern.matchGroup()) - { - hasId = true; - id = _pattern.id(); - } - else - { - hasId = false; - item = _pattern.toAssemblyItem(_location); - } - for (auto const& arg: _pattern.arguments()) - arguments.push_back(ExpressionTemplate(arg, _location)); -} - -string ExpressionTemplate::toString() const -{ - stringstream s; - if (hasId) - s << id; - else - s << item; - s << "("; - for (auto const& arg: arguments) - s << arg.toString(); - s << ")"; - return s.str(); -} diff --git a/libevmasm/ExpressionClasses.h b/libevmasm/ExpressionClasses.h index 11a698dd..5d53b292 100644 --- a/libevmasm/ExpressionClasses.h +++ b/libevmasm/ExpressionClasses.h @@ -121,70 +121,5 @@ private: std::vector<std::shared_ptr<AssemblyItem>> m_spareAssemblyItems; }; -/** - * Pattern to match against an expression. - * Also stores matched expressions to retrieve them later, for constructing new expressions using - * ExpressionTemplate. - */ -class Pattern -{ -public: - using Expression = ExpressionClasses::Expression; - using Id = ExpressionClasses::Id; - - // Matches a specific constant value. - Pattern(unsigned _value): Pattern(u256(_value)) {} - // Matches a specific constant value. - Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(_value) {} - // Matches a specific assembly item type or anything if not given. - Pattern(AssemblyItemType _type = UndefinedItem): m_type(_type) {} - // Matches a given instruction with given arguments - Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments = {}); - /// Sets this pattern to be part of the match group with the identifier @a _group. - /// Inside one rule, all patterns in the same match group have to match expressions from the - /// same expression equivalence class. - void setMatchGroup(unsigned _group, std::map<unsigned, Expression const*>& _matchGroups); - unsigned matchGroup() const { return m_matchGroup; } - bool matches(Expression const& _expr, ExpressionClasses const& _classes) const; - - AssemblyItem toAssemblyItem(SourceLocation const& _location) const; - std::vector<Pattern> arguments() const { return m_arguments; } - - /// @returns the id of the matched expression if this pattern is part of a match group. - Id id() const { return matchGroupValue().id; } - /// @returns the data of the matched expression if this pattern is part of a match group. - u256 const& d() const { return matchGroupValue().item->data(); } - - std::string toString() const; - -private: - bool matchesBaseItem(AssemblyItem const* _item) const; - Expression const& matchGroupValue() const; - - AssemblyItemType m_type; - bool m_requireDataMatch = false; - u256 m_data = 0; - std::vector<Pattern> m_arguments; - unsigned m_matchGroup = 0; - std::map<unsigned, Expression const*>* m_matchGroups = nullptr; -}; - -/** - * Template for a new expression that can be built from matched patterns. - */ -struct ExpressionTemplate -{ - using Expression = ExpressionClasses::Expression; - using Id = ExpressionClasses::Id; - explicit ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location); - std::string toString() const; - bool hasId = false; - /// Id of the matched expression, if available. - Id id = Id(-1); - // Otherwise, assembly item. - AssemblyItem item = UndefinedItem; - std::vector<ExpressionTemplate> arguments; -}; - } } diff --git a/libevmasm/PeepholeOptimiser.cpp b/libevmasm/PeepholeOptimiser.cpp index b96b0295..923ffa67 100644 --- a/libevmasm/PeepholeOptimiser.cpp +++ b/libevmasm/PeepholeOptimiser.cpp @@ -120,7 +120,7 @@ struct OpPop: SimplePeepholeOptimizerMethod<OpPop, 2> if (instructionInfo(instr).ret == 1 && !instructionInfo(instr).sideEffects) { for (int j = 0; j < instructionInfo(instr).args; j++) - *_out = Instruction::POP; + *_out = {Instruction::POP, _op.location()}; return true; } } diff --git a/libevmasm/SimplificationRules.cpp b/libevmasm/SimplificationRules.cpp new file mode 100644 index 00000000..2976d95f --- /dev/null +++ b/libevmasm/SimplificationRules.cpp @@ -0,0 +1,370 @@ +/* + This file is part of solidity. + + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with solidity. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file ExpressionClasses.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Container for equivalence classes of expressions for use in common subexpression elimination. + */ + +#include <libevmasm/ExpressionClasses.h> +#include <utility> +#include <tuple> +#include <functional> +#include <boost/range/adaptor/reversed.hpp> +#include <boost/noncopyable.hpp> +#include <libevmasm/Assembly.h> +#include <libevmasm/CommonSubexpressionEliminator.h> +#include <libevmasm/SimplificationRules.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + + +pair<Pattern, function<Pattern()> > const* Rules::findFirstMatch( + Expression const& _expr, + ExpressionClasses const& _classes +) +{ + resetMatchGroups(); + + assertThrow(_expr.item, OptimizerException, ""); + for (auto const& rule: m_rules[byte(_expr.item->instruction())]) + { + if (rule.first.matches(_expr, _classes)) + return &rule; + resetMatchGroups(); + } + return nullptr; +} + +void Rules::addRules(std::vector<std::pair<Pattern, std::function<Pattern ()> > > const& _rules) +{ + for (auto const& r: _rules) + addRule(r); +} + +void Rules::addRule(std::pair<Pattern, std::function<Pattern()> > const& _rule) +{ + m_rules[byte(_rule.first.instruction())].push_back(_rule); +} + +template <class S> S divWorkaround(S const& _a, S const& _b) +{ + return (S)(bigint(_a) / bigint(_b)); +} + +template <class S> S modWorkaround(S const& _a, S const& _b) +{ + return (S)(bigint(_a) % bigint(_b)); +} + +Rules::Rules() +{ + // Multiple occurences of one of these inside one rule must match the same equivalence class. + // Constants. + Pattern A(Push); + Pattern B(Push); + Pattern C(Push); + // Anything. + Pattern X; + Pattern Y; + Pattern Z; + A.setMatchGroup(1, m_matchGroups); + B.setMatchGroup(2, m_matchGroups); + C.setMatchGroup(3, m_matchGroups); + X.setMatchGroup(4, m_matchGroups); + Y.setMatchGroup(5, m_matchGroups); + Z.setMatchGroup(6, m_matchGroups); + + addRules(vector<pair<Pattern, function<Pattern()>>>{ + // arithmetics on constants + {{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }}, + {{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }}, + {{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }}, + {{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : divWorkaround(A.d(), B.d()); }}, + {{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(divWorkaround(u2s(A.d()), u2s(B.d()))); }}, + {{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : modWorkaround(A.d(), B.d()); }}, + {{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(modWorkaround(u2s(A.d()), u2s(B.d()))); }}, + {{Instruction::EXP, {A, B}}, [=]{ return u256(boost::multiprecision::powm(bigint(A.d()), bigint(B.d()), bigint(1) << 256)); }}, + {{Instruction::NOT, {A}}, [=]{ return ~A.d(); }}, + {{Instruction::LT, {A, B}}, [=]() { return A.d() < B.d() ? u256(1) : 0; }}, + {{Instruction::GT, {A, B}}, [=]() -> u256 { return A.d() > B.d() ? 1 : 0; }}, + {{Instruction::SLT, {A, B}}, [=]() -> u256 { return u2s(A.d()) < u2s(B.d()) ? 1 : 0; }}, + {{Instruction::SGT, {A, B}}, [=]() -> u256 { return u2s(A.d()) > u2s(B.d()) ? 1 : 0; }}, + {{Instruction::EQ, {A, B}}, [=]() -> u256 { return A.d() == B.d() ? 1 : 0; }}, + {{Instruction::ISZERO, {A}}, [=]() -> u256 { return A.d() == 0 ? 1 : 0; }}, + {{Instruction::AND, {A, B}}, [=]{ return A.d() & B.d(); }}, + {{Instruction::OR, {A, B}}, [=]{ return A.d() | B.d(); }}, + {{Instruction::XOR, {A, B}}, [=]{ return A.d() ^ B.d(); }}, + {{Instruction::BYTE, {A, B}}, [=]{ return A.d() >= 32 ? 0 : (B.d() >> unsigned(8 * (31 - A.d()))) & 0xff; }}, + {{Instruction::ADDMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) + bigint(B.d())) % C.d()); }}, + {{Instruction::MULMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) * bigint(B.d())) % C.d()); }}, + {{Instruction::MULMOD, {A, B, C}}, [=]{ return A.d() * B.d(); }}, + {{Instruction::SIGNEXTEND, {A, B}}, [=]() -> u256 { + if (A.d() >= 31) + return B.d(); + unsigned testBit = unsigned(A.d()) * 8 + 7; + u256 mask = (u256(1) << testBit) - 1; + return u256(boost::multiprecision::bit_test(B.d(), testBit) ? B.d() | ~mask : B.d() & mask); + }}, + + // invariants involving known constants + {{Instruction::ADD, {X, 0}}, [=]{ return X; }}, + {{Instruction::SUB, {X, 0}}, [=]{ return X; }}, + {{Instruction::MUL, {X, 1}}, [=]{ return X; }}, + {{Instruction::DIV, {X, 1}}, [=]{ return X; }}, + {{Instruction::SDIV, {X, 1}}, [=]{ return X; }}, + {{Instruction::OR, {X, 0}}, [=]{ return X; }}, + {{Instruction::XOR, {X, 0}}, [=]{ return X; }}, + {{Instruction::AND, {X, ~u256(0)}}, [=]{ return X; }}, + {{Instruction::AND, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::MUL, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::DIV, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::DIV, {0, X}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {0, X}}, [=]{ return u256(0); }}, + {{Instruction::OR, {X, ~u256(0)}}, [=]{ return ~u256(0); }}, + {{Instruction::EQ, {X, 0}}, [=]() -> Pattern { return {Instruction::ISZERO, {X}}; } }, + // operations involving an expression and itself + {{Instruction::AND, {X, X}}, [=]{ return X; }}, + {{Instruction::OR, {X, X}}, [=]{ return X; }}, + {{Instruction::XOR, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SUB, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::EQ, {X, X}}, [=]{ return u256(1); }}, + {{Instruction::LT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SLT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::GT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SGT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {X, X}}, [=]{ return u256(0); }}, + + {{Instruction::NOT, {{Instruction::NOT, {X}}}}, [=]{ return X; }}, + {{Instruction::XOR, {{{X}, {Instruction::XOR, {X, Y}}}}}, [=]{ return Y; }}, + {{Instruction::OR, {{{X}, {Instruction::AND, {X, Y}}}}}, [=]{ return X; }}, + {{Instruction::AND, {{{X}, {Instruction::OR, {X, Y}}}}}, [=]{ return X; }}, + {{Instruction::AND, {{{X}, {Instruction::NOT, {X}}}}}, [=]{ return u256(0); }}, + {{Instruction::OR, {{{X}, {Instruction::NOT, {X}}}}}, [=]{ return ~u256(0); }}, + }); + // Double negation of opcodes with binary result + for (auto const& op: vector<Instruction>{ + Instruction::EQ, + Instruction::LT, + Instruction::SLT, + Instruction::GT, + Instruction::SGT + }) + addRule({ + {Instruction::ISZERO, {{Instruction::ISZERO, {{op, {X, Y}}}}}}, + [=]() -> Pattern { return {op, {X, Y}}; } + }); + addRule({ + {Instruction::ISZERO, {{Instruction::ISZERO, {{Instruction::ISZERO, {X}}}}}}, + [=]() -> Pattern { return {Instruction::ISZERO, {X}}; } + }); + addRule({ + {Instruction::ISZERO, {{Instruction::XOR, {X, Y}}}}, + [=]() -> Pattern { return { Instruction::EQ, {X, Y} }; } + }); + // Associative operations + for (auto const& opFun: vector<pair<Instruction,function<u256(u256 const&,u256 const&)>>>{ + {Instruction::ADD, plus<u256>()}, + {Instruction::MUL, multiplies<u256>()}, + {Instruction::AND, bit_and<u256>()}, + {Instruction::OR, bit_or<u256>()}, + {Instruction::XOR, bit_xor<u256>()} + }) + { + auto op = opFun.first; + auto fun = opFun.second; + // Moving constants to the outside, order matters here! + // we need actions that return expressions (or patterns?) here, and we need also reversed rules + // (X+A)+B -> X+(A+B) + addRules(vector<pair<Pattern, function<Pattern()>>>{{ + {op, {{op, {X, A}}, B}}, + [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } + }, { + // X+(Y+A) -> (X+Y)+A + {op, {{op, {X, A}}, Y}}, + [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } + }, { + // For now, we still need explicit commutativity for the inner pattern + {op, {{op, {A, X}}, B}}, + [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } + }, { + {op, {{op, {A, X}}, Y}}, + [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } + }}); + } + // move constants across subtractions + addRules(vector<pair<Pattern, function<Pattern()>>>{ + { + // X - A -> X + (-A) + {Instruction::SUB, {X, A}}, + [=]() -> Pattern { return {Instruction::ADD, {X, 0 - A.d()}}; } + }, { + // (X + A) - Y -> (X - Y) + A + {Instruction::SUB, {{Instruction::ADD, {X, A}}, Y}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } + }, { + // (A + X) - Y -> (X - Y) + A + {Instruction::SUB, {{Instruction::ADD, {A, X}}, Y}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } + }, { + // X - (Y + A) -> (X - Y) + (-A) + {Instruction::SUB, {X, {Instruction::ADD, {Y, A}}}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } + }, { + // X - (A + Y) -> (X - Y) + (-A) + {Instruction::SUB, {X, {Instruction::ADD, {A, Y}}}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } + } + }); +} + +Pattern::Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments): + m_type(Operation), + m_instruction(_instruction), + m_arguments(_arguments) +{ +} + +void Pattern::setMatchGroup(unsigned _group, map<unsigned, Expression const*>& _matchGroups) +{ + m_matchGroup = _group; + m_matchGroups = &_matchGroups; +} + +bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const +{ + if (!matchesBaseItem(_expr.item)) + return false; + if (m_matchGroup) + { + if (!m_matchGroups->count(m_matchGroup)) + (*m_matchGroups)[m_matchGroup] = &_expr; + else if ((*m_matchGroups)[m_matchGroup]->id != _expr.id) + return false; + } + assertThrow(m_arguments.size() == 0 || _expr.arguments.size() == m_arguments.size(), OptimizerException, ""); + for (size_t i = 0; i < m_arguments.size(); ++i) + if (!m_arguments[i].matches(_classes.representative(_expr.arguments[i]), _classes)) + return false; + return true; +} + +AssemblyItem Pattern::toAssemblyItem(SourceLocation const& _location) const +{ + if (m_type == Operation) + return AssemblyItem(m_instruction, _location); + else + return AssemblyItem(m_type, data(), _location); +} + +string Pattern::toString() const +{ + stringstream s; + switch (m_type) + { + case Operation: + s << instructionInfo(m_instruction).name; + break; + case Push: + if (m_data) + s << "PUSH " << hex << data(); + else + s << "PUSH "; + break; + case UndefinedItem: + s << "ANY"; + break; + default: + if (m_data) + s << "t=" << dec << m_type << " d=" << hex << data(); + else + s << "t=" << dec << m_type << " d: nullptr"; + break; + } + if (!m_requireDataMatch) + s << " ~"; + if (m_matchGroup) + s << "[" << dec << m_matchGroup << "]"; + s << "("; + for (Pattern const& p: m_arguments) + s << p.toString() << ", "; + s << ")"; + return s.str(); +} + +bool Pattern::matchesBaseItem(AssemblyItem const* _item) const +{ + if (m_type == UndefinedItem) + return true; + if (!_item) + return false; + if (m_type != _item->type()) + return false; + else if (m_type == Operation) + return m_instruction == _item->instruction(); + else if (m_requireDataMatch) + return data() == _item->data(); + return true; +} + +Pattern::Expression const& Pattern::matchGroupValue() const +{ + assertThrow(m_matchGroup > 0, OptimizerException, ""); + assertThrow(!!m_matchGroups, OptimizerException, ""); + assertThrow((*m_matchGroups)[m_matchGroup], OptimizerException, ""); + return *(*m_matchGroups)[m_matchGroup]; +} + +u256 const& Pattern::data() const +{ + assertThrow(m_data, OptimizerException, ""); + return *m_data; +} + +ExpressionTemplate::ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location) +{ + if (_pattern.matchGroup()) + { + hasId = true; + id = _pattern.id(); + } + else + { + hasId = false; + item = _pattern.toAssemblyItem(_location); + } + for (auto const& arg: _pattern.arguments()) + arguments.push_back(ExpressionTemplate(arg, _location)); +} + +string ExpressionTemplate::toString() const +{ + stringstream s; + if (hasId) + s << id; + else + s << item; + s << "("; + for (auto const& arg: arguments) + s << arg.toString(); + s << ")"; + return s.str(); +} diff --git a/libevmasm/SimplificationRules.h b/libevmasm/SimplificationRules.h new file mode 100644 index 00000000..a4da5849 --- /dev/null +++ b/libevmasm/SimplificationRules.h @@ -0,0 +1,140 @@ +/* + This file is part of solidity. + + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with solidity. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file SimplificationRules + * @author Christian <chris@ethereum.org> + * @date 2017 + * Module for applying replacement rules against Expressions. + */ + +#pragma once + +#include <libevmasm/ExpressionClasses.h> + +#include <functional> +#include <vector> + +namespace dev +{ +namespace eth +{ + +class Pattern; + +/** + * Container for all simplification rules. + */ +class Rules: public boost::noncopyable +{ +public: + using Expression = ExpressionClasses::Expression; + + Rules(); + + /// @returns a pointer to the first matching pattern and sets the match + /// groups accordingly. + std::pair<Pattern, std::function<Pattern()>> const* findFirstMatch( + Expression const& _expr, + ExpressionClasses const& _classes + ); + +private: + void addRules(std::vector<std::pair<Pattern, std::function<Pattern()>>> const& _rules); + void addRule(std::pair<Pattern, std::function<Pattern()>> const& _rule); + + void resetMatchGroups() { m_matchGroups.clear(); } + + std::map<unsigned, Expression const*> m_matchGroups; + std::vector<std::pair<Pattern, std::function<Pattern()>>> m_rules[256]; +}; + +/** + * Pattern to match against an expression. + * Also stores matched expressions to retrieve them later, for constructing new expressions using + * ExpressionTemplate. + */ +class Pattern +{ +public: + using Expression = ExpressionClasses::Expression; + using Id = ExpressionClasses::Id; + + // Matches a specific constant value. + Pattern(unsigned _value): Pattern(u256(_value)) {} + // Matches a specific constant value. + Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(std::make_shared<u256>(_value)) {} + // Matches a specific assembly item type or anything if not given. + Pattern(AssemblyItemType _type = UndefinedItem): m_type(_type) {} + // Matches a given instruction with given arguments + Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments = {}); + /// Sets this pattern to be part of the match group with the identifier @a _group. + /// Inside one rule, all patterns in the same match group have to match expressions from the + /// same expression equivalence class. + void setMatchGroup(unsigned _group, std::map<unsigned, Expression const*>& _matchGroups); + unsigned matchGroup() const { return m_matchGroup; } + bool matches(Expression const& _expr, ExpressionClasses const& _classes) const; + + AssemblyItem toAssemblyItem(SourceLocation const& _location) const; + std::vector<Pattern> arguments() const { return m_arguments; } + + /// @returns the id of the matched expression if this pattern is part of a match group. + Id id() const { return matchGroupValue().id; } + /// @returns the data of the matched expression if this pattern is part of a match group. + u256 const& d() const { return matchGroupValue().item->data(); } + + std::string toString() const; + + AssemblyItemType type() const { return m_type; } + Instruction instruction() const + { + assertThrow(type() == Operation, OptimizerException, ""); + return m_instruction; + } + +private: + bool matchesBaseItem(AssemblyItem const* _item) const; + Expression const& matchGroupValue() const; + u256 const& data() const; + + AssemblyItemType m_type; + bool m_requireDataMatch = false; + Instruction m_instruction; ///< Only valid if m_type is Operation + std::shared_ptr<u256> m_data; ///< Only valid if m_type is not Operation + std::vector<Pattern> m_arguments; + unsigned m_matchGroup = 0; + std::map<unsigned, Expression const*>* m_matchGroups = nullptr; +}; + +/** + * Template for a new expression that can be built from matched patterns. + */ +struct ExpressionTemplate +{ + using Expression = ExpressionClasses::Expression; + using Id = ExpressionClasses::Id; + explicit ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location); + std::string toString() const; + bool hasId = false; + /// Id of the matched expression, if available. + Id id = Id(-1); + // Otherwise, assembly item. + AssemblyItem item = UndefinedItem; + std::vector<ExpressionTemplate> arguments; +}; + +} +} diff --git a/libsolidity/analysis/DeclarationContainer.cpp b/libsolidity/analysis/DeclarationContainer.cpp index 1599b83a..f8c12c5b 100644 --- a/libsolidity/analysis/DeclarationContainer.cpp +++ b/libsolidity/analysis/DeclarationContainer.cpp @@ -44,10 +44,19 @@ Declaration const* DeclarationContainer::conflictingDeclaration( if (dynamic_cast<FunctionDefinition const*>(&_declaration)) { - // check that all other declarations with the same name are functions + // check that all other declarations with the same name are functions or a public state variable for (Declaration const* declaration: declarations) - if (!dynamic_cast<FunctionDefinition const*>(declaration)) + { + if (dynamic_cast<FunctionDefinition const*>(declaration)) + continue; + if (auto variableDeclaration = dynamic_cast<VariableDeclaration const*>(declaration)) + { + if (variableDeclaration->isStateVariable() && !variableDeclaration->isConstant() && variableDeclaration->isPublic()) + continue; return declaration; + } + return declaration; + } } else if (declarations.size() == 1 && declarations.front() == &_declaration) return nullptr; diff --git a/libsolidity/analysis/NameAndTypeResolver.cpp b/libsolidity/analysis/NameAndTypeResolver.cpp index 2a33a501..08323243 100644 --- a/libsolidity/analysis/NameAndTypeResolver.cpp +++ b/libsolidity/analysis/NameAndTypeResolver.cpp @@ -260,10 +260,16 @@ vector<Declaration const*> NameAndTypeResolver::cleanedDeclarations( for (auto it = _declarations.begin(); it != _declarations.end(); ++it) { solAssert(*it, ""); - // the declaration is functionDefinition while declarations > 1 - FunctionDefinition const& functionDefinition = dynamic_cast<FunctionDefinition const&>(**it); - FunctionType functionType(functionDefinition); - for (auto parameter: functionType.parameterTypes() + functionType.returnParameterTypes()) + // the declaration is functionDefinition or a VariableDeclaration while declarations > 1 + solAssert(dynamic_cast<FunctionDefinition const*>(*it) || dynamic_cast<VariableDeclaration const*>(*it), + "Found overloading involving something not a function or a variable"); + + shared_ptr<FunctionType const> functionType { (*it)->functionType(false) }; + if (!functionType) + functionType = (*it)->functionType(true); + solAssert(functionType, "failed to determine the function type of the overloaded"); + + for (auto parameter: functionType->parameterTypes() + functionType->returnParameterTypes()) if (!parameter) reportFatalDeclarationError(_identifier.location(), "Function type can not be used in this context"); @@ -272,8 +278,10 @@ vector<Declaration const*> NameAndTypeResolver::cleanedDeclarations( uniqueFunctions.end(), [&](Declaration const* d) { - FunctionType newFunctionType(dynamic_cast<FunctionDefinition const&>(*d)); - return functionType.hasEqualArgumentTypes(newFunctionType); + shared_ptr<FunctionType const> newFunctionType { d->functionType(false) }; + if (!newFunctionType) + newFunctionType = d->functionType(true); + return newFunctionType && functionType->hasEqualArgumentTypes(*newFunctionType); } )) uniqueFunctions.push_back(*it); @@ -289,7 +297,39 @@ void NameAndTypeResolver::importInheritedScope(ContractDefinition const& _base) for (auto const& declaration: nameAndDeclaration.second) // Import if it was declared in the base, is not the constructor and is visible in derived classes if (declaration->scope() == &_base && declaration->isVisibleInDerivedContracts()) - m_currentScope->registerDeclaration(*declaration); + if (!m_currentScope->registerDeclaration(*declaration)) + { + SourceLocation firstDeclarationLocation; + SourceLocation secondDeclarationLocation; + Declaration const* conflictingDeclaration = m_currentScope->conflictingDeclaration(*declaration); + solAssert(conflictingDeclaration, ""); + + // Usual shadowing is not an error + if (dynamic_cast<VariableDeclaration const*>(declaration) && dynamic_cast<VariableDeclaration const*>(conflictingDeclaration)) + continue; + + // Usual shadowing is not an error + if (dynamic_cast<ModifierDefinition const*>(declaration) && dynamic_cast<ModifierDefinition const*>(conflictingDeclaration)) + continue; + + if (declaration->location().start < conflictingDeclaration->location().start) + { + firstDeclarationLocation = declaration->location(); + secondDeclarationLocation = conflictingDeclaration->location(); + } + else + { + firstDeclarationLocation = conflictingDeclaration->location(); + secondDeclarationLocation = declaration->location(); + } + + reportDeclarationError( + secondDeclarationLocation, + "Identifier already declared.", + firstDeclarationLocation, + "The previous declaration is here:" + ); + } } void NameAndTypeResolver::linearizeBaseContracts(ContractDefinition& _contract) diff --git a/libsolidity/analysis/TypeChecker.cpp b/libsolidity/analysis/TypeChecker.cpp index e414e27c..67c8ac17 100644 --- a/libsolidity/analysis/TypeChecker.cpp +++ b/libsolidity/analysis/TypeChecker.cpp @@ -1500,8 +1500,23 @@ bool TypeChecker::visit(Identifier const& _identifier) if (!annotation.referencedDeclaration) { if (!annotation.argumentTypes) - fatalTypeError(_identifier.location(), "Unable to determine overloaded type."); - if (annotation.overloadedDeclarations.empty()) + { + // The identifier should be a public state variable shadowing other functions + vector<Declaration const*> candidates; + + for (Declaration const* declaration: annotation.overloadedDeclarations) + { + if (VariableDeclaration const* variableDeclaration = dynamic_cast<decltype(variableDeclaration)>(declaration)) + candidates.push_back(declaration); + } + if (candidates.empty()) + fatalTypeError(_identifier.location(), "No matching declaration found after variable lookup."); + else if (candidates.size() == 1) + annotation.referencedDeclaration = candidates.front(); + else + fatalTypeError(_identifier.location(), "No unique declaration found after variable lookup."); + } + else if (annotation.overloadedDeclarations.empty()) fatalTypeError(_identifier.location(), "No candidates for overload resolution found."); else if (annotation.overloadedDeclarations.size() == 1) annotation.referencedDeclaration = *annotation.overloadedDeclarations.begin(); diff --git a/libsolidity/ast/AST.cpp b/libsolidity/ast/AST.cpp index 3cd1dfbe..6f7a64dc 100644 --- a/libsolidity/ast/AST.cpp +++ b/libsolidity/ast/AST.cpp @@ -217,6 +217,9 @@ vector<Declaration const*> const& ContractDefinition::inheritableMembers() const for (EnumDefinition const* e: definedEnums()) addInheritableMember(e); + + for (EventDefinition const* e: events()) + addInheritableMember(e); } return *m_inheritableMembers; } @@ -271,6 +274,45 @@ TypeDeclarationAnnotation& EnumDefinition::annotation() const return static_cast<TypeDeclarationAnnotation&>(*m_annotation); } +shared_ptr<FunctionType> FunctionDefinition::functionType(bool _internal) const +{ + if (_internal) + { + switch (visibility()) + { + case Declaration::Visibility::Default: + solAssert(false, "visibility() should not return Default"); + case Declaration::Visibility::Private: + case Declaration::Visibility::Internal: + case Declaration::Visibility::Public: + return make_shared<FunctionType>(*this, _internal); + case Declaration::Visibility::External: + return {}; + default: + solAssert(false, "visibility() should not return a Visibility"); + } + } + else + { + switch (visibility()) + { + case Declaration::Visibility::Default: + solAssert(false, "visibility() should not return Default"); + case Declaration::Visibility::Private: + case Declaration::Visibility::Internal: + return {}; + case Declaration::Visibility::Public: + case Declaration::Visibility::External: + return make_shared<FunctionType>(*this, _internal); + default: + solAssert(false, "visibility() should not return a Visibility"); + } + } + + // To make the compiler happy + return {}; +} + TypePointer FunctionDefinition::type() const { return make_shared<FunctionType>(*this); @@ -305,6 +347,14 @@ TypePointer EventDefinition::type() const return make_shared<FunctionType>(*this); } +std::shared_ptr<FunctionType> EventDefinition::functionType(bool _internal) const +{ + if (_internal) + return make_shared<FunctionType>(*this); + else + return {}; +} + EventDefinitionAnnotation& EventDefinition::annotation() const { if (!m_annotation) @@ -362,6 +412,28 @@ TypePointer VariableDeclaration::type() const return annotation().type; } +shared_ptr<FunctionType> VariableDeclaration::functionType(bool _internal) const +{ + if (_internal) + return {}; + switch (visibility()) + { + case Declaration::Visibility::Default: + solAssert(false, "visibility() should not return Default"); + case Declaration::Visibility::Private: + case Declaration::Visibility::Internal: + return {}; + case Declaration::Visibility::Public: + case Declaration::Visibility::External: + return make_shared<FunctionType>(*this); + default: + solAssert(false, "visibility() should not return a Visibility"); + } + + // To make the compiler happy + return {}; +} + VariableDeclarationAnnotation& VariableDeclaration::annotation() const { if (!m_annotation) diff --git a/libsolidity/ast/AST.h b/libsolidity/ast/AST.h index ab4be1ea..2d092408 100644 --- a/libsolidity/ast/AST.h +++ b/libsolidity/ast/AST.h @@ -171,6 +171,10 @@ public: /// This can only be called once types of variable declarations have already been resolved. virtual TypePointer type() const = 0; + /// @param _internal false indicates external interface is concerned, true indicates internal interface is concerned. + /// @returns null when it is not accessible as a function. + virtual std::shared_ptr<FunctionType> functionType(bool /*_internal*/) const { return {}; } + protected: virtual Visibility defaultVisibility() const { return Visibility::Public; } @@ -581,6 +585,10 @@ public: virtual TypePointer type() const override; + /// @param _internal false indicates external interface is concerned, true indicates internal interface is concerned. + /// @returns null when it is not accessible as a function. + virtual std::shared_ptr<FunctionType> functionType(bool /*_internal*/) const override; + virtual FunctionDefinitionAnnotation& annotation() const override; private: @@ -643,6 +651,10 @@ public: virtual TypePointer type() const override; + /// @param _internal false indicates external interface is concerned, true indicates internal interface is concerned. + /// @returns null when it is not accessible as a function. + virtual std::shared_ptr<FunctionType> functionType(bool /*_internal*/) const override; + virtual VariableDeclarationAnnotation& annotation() const override; protected: @@ -740,6 +752,7 @@ public: bool isAnonymous() const { return m_anonymous; } virtual TypePointer type() const override; + virtual std::shared_ptr<FunctionType> functionType(bool /*_internal*/) const override; virtual EventDefinitionAnnotation& annotation() const override; diff --git a/libsolidity/codegen/ExpressionCompiler.cpp b/libsolidity/codegen/ExpressionCompiler.cpp index a7fb8408..3922da88 100644 --- a/libsolidity/codegen/ExpressionCompiler.cpp +++ b/libsolidity/codegen/ExpressionCompiler.cpp @@ -908,19 +908,43 @@ bool ExpressionCompiler::visit(MemberAccess const& _memberAccess) solAssert(_memberAccess.annotation().type, "_memberAccess has no type"); if (auto funType = dynamic_cast<FunctionType const*>(_memberAccess.annotation().type.get())) { - if (funType->location() != FunctionType::Location::Internal) - { - _memberAccess.expression().accept(*this); - m_context << funType->externalIdentifier(); - } - else + switch (funType->location()) { + case FunctionType::Location::Internal: // We do not visit the expression here on purpose, because in the case of an // internal library function call, this would push the library address forcing // us to link against it although we actually do not need it. - auto const* function = dynamic_cast<FunctionDefinition const*>(_memberAccess.annotation().referencedDeclaration); - solAssert(!!function, "Function not found in member access"); - utils().pushCombinedFunctionEntryLabel(*function); + if (auto const* function = dynamic_cast<FunctionDefinition const*>(_memberAccess.annotation().referencedDeclaration)) + utils().pushCombinedFunctionEntryLabel(*function); + else + solAssert(false, "Function not found in member access"); + break; + case FunctionType::Location::Event: + if (!dynamic_cast<EventDefinition const*>(_memberAccess.annotation().referencedDeclaration)) + solAssert(false, "event not found"); + // no-op, because the parent node will do the job + break; + case FunctionType::Location::External: + case FunctionType::Location::Creation: + case FunctionType::Location::DelegateCall: + case FunctionType::Location::CallCode: + case FunctionType::Location::Send: + case FunctionType::Location::Bare: + case FunctionType::Location::BareCallCode: + case FunctionType::Location::BareDelegateCall: + _memberAccess.expression().accept(*this); + m_context << funType->externalIdentifier(); + break; + case FunctionType::Location::Log0: + case FunctionType::Location::Log1: + case FunctionType::Location::Log2: + case FunctionType::Location::Log3: + case FunctionType::Location::Log4: + case FunctionType::Location::ECRecover: + case FunctionType::Location::SHA256: + case FunctionType::Location::RIPEMD160: + default: + solAssert(false, "unsupported member function"); } } else if (dynamic_cast<TypeType const*>(_memberAccess.annotation().type.get())) diff --git a/libsolidity/interface/CompilerStack.cpp b/libsolidity/interface/CompilerStack.cpp index 4095844f..a31df584 100644 --- a/libsolidity/interface/CompilerStack.cpp +++ b/libsolidity/interface/CompilerStack.cpp @@ -509,23 +509,32 @@ string CompilerStack::applyRemapping(string const& _path, string const& _context }; size_t longestPrefix = 0; - string longestPrefixTarget; + size_t longestContext = 0; + string bestMatchTarget; + for (auto const& redir: m_remappings) { - // Skip if we already have a closer match. - if (longestPrefix > 0 && redir.prefix.length() <= longestPrefix) + string context = sanitizePath(redir.context); + string prefix = sanitizePath(redir.prefix); + + // Skip if current context is closer + if (context.length() < longestContext) continue; // Skip if redir.context is not a prefix of _context - if (!isPrefixOf(redir.context, _context)) + if (!isPrefixOf(context, _context)) + continue; + // Skip if we already have a closer prefix match. + if (prefix.length() < longestPrefix && context.length() == longestContext) continue; // Skip if the prefix does not match. - if (!isPrefixOf(redir.prefix, _path)) + if (!isPrefixOf(prefix, _path)) continue; - longestPrefix = redir.prefix.length(); - longestPrefixTarget = redir.target; + longestContext = context.length(); + longestPrefix = prefix.length(); + bestMatchTarget = sanitizePath(redir.target); } - string path = longestPrefixTarget; + string path = bestMatchTarget; path.append(_path.begin() + longestPrefix, _path.end()); return path; } @@ -593,11 +602,11 @@ bool CompilerStack::checkLibraryNameClashes() string CompilerStack::absolutePath(string const& _path, string const& _reference) const { - // Anything that does not start with `.` is an absolute path. - if (_path.empty() || _path.front() != '.') - return _path; using path = boost::filesystem::path; path p(_path); + // Anything that does not start with `.` is an absolute path. + if (p.begin() == p.end() || (*p.begin() != "." && *p.begin() != "..")) + return _path; path result(_reference); result.remove_filename(); for (path::iterator it = p.begin(); it != p.end(); ++it) diff --git a/libsolidity/interface/CompilerStack.h b/libsolidity/interface/CompilerStack.h index f98a457a..d49a8df1 100644 --- a/libsolidity/interface/CompilerStack.h +++ b/libsolidity/interface/CompilerStack.h @@ -29,6 +29,7 @@ #include <vector> #include <functional> #include <boost/noncopyable.hpp> +#include <boost/filesystem.hpp> #include <json/json.h> #include <libdevcore/Common.h> #include <libdevcore/FixedHash.h> @@ -234,12 +235,14 @@ private: bool checkLibraryNameClashes(); /// @returns the absolute path corresponding to @a _path relative to @a _reference. std::string absolutePath(std::string const& _path, std::string const& _reference) const; + /// Helper function to return path converted strings. + std::string sanitizePath(std::string const& _path) const { return boost::filesystem::path(_path).generic_string(); } + /// Compile a single contract and put the result in @a _compiledContracts. void compileContract( ContractDefinition const& _contract, std::map<ContractDefinition const*, eth::Assembly const*>& _compiledContracts ); - void link(); Contract const& contract(std::string const& _contractName = "") const; diff --git a/libsolidity/parsing/Parser.cpp b/libsolidity/parsing/Parser.cpp index f02a4a45..e26e2908 100644 --- a/libsolidity/parsing/Parser.cpp +++ b/libsolidity/parsing/Parser.cpp @@ -1153,9 +1153,9 @@ ASTPointer<Expression> Parser::parseLeftHandSideExpression( else if (m_scanner->currentToken() == Token::New) { expectToken(Token::New); - ASTPointer<TypeName> contractName(parseTypeName(false)); - nodeFactory.setEndPositionFromNode(contractName); - expression = nodeFactory.createNode<NewExpression>(contractName); + ASTPointer<TypeName> typeName(parseTypeName(false)); + nodeFactory.setEndPositionFromNode(typeName); + expression = nodeFactory.createNode<NewExpression>(typeName); } else expression = parsePrimaryExpression(); diff --git a/scripts/create_source_tarball.sh b/scripts/create_source_tarball.sh new file mode 100755 index 00000000..1f78e12c --- /dev/null +++ b/scripts/create_source_tarball.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env sh +# + +set -e + +REPO_ROOT="$(dirname "$0")"/.. +( + cd "$REPO_ROOT" + version=$(grep -oP "PROJECT_VERSION \"?\K[0-9.]+(?=\")"? CMakeLists.txt) + commithash=$(git rev-parse --short=8 HEAD) + commitdate=$(git show --format=%ci HEAD | head -n 1 | cut - -b1-10 | sed -e 's/-0?/./' | sed -e 's/-0?/./') + + # file exists and has zero size -> not a prerelease + if [ -e prerelease.txt -a ! -s prerelease.txt ] + then + versionstring="$version" + else + versionstring="$version-develop-$commitdate-$commithash" + fi + + TEMPDIR=$(mktemp -d) + SOLDIR="$TEMPDIR/solidity_$versionstring/" + mkdir "$SOLDIR" + # Store the current source + git checkout-index -a --prefix="$SOLDIR" + git submodule foreach 'git checkout-index -a --prefix="'"$SOLDIR"'/$path/"' + # Store the commit hash + echo "$commithash" > "$SOLDIR/commit_hash.txt" + # Add dependencies + mkdir -p "$SOLDIR/deps/downloads/" 2>/dev/null || true + wget -O "$SOLDIR/deps/downloads/jsoncpp-1.7.7.tar.gz" https://github.com/open-source-parsers/jsoncpp/archive/1.7.7.tar.gz + tar czf "$REPO_ROOT/solidity_$versionstring.tar.gz" -C "$TEMPDIR" "solidity_$versionstring" + rm -r "$TEMPDIR" +) diff --git a/scripts/install_cmake.sh b/scripts/install_cmake.sh new file mode 100755 index 00000000..00d013b9 --- /dev/null +++ b/scripts/install_cmake.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env sh + +# This script downloads the CMake binary and installs it in ~/.local directory +# (the cmake executable will be in ~/.local/bin). +# This is mostly suitable for CIs, not end users. + +set -e + +VERSION=3.7.1 +PREFIX=~/.local + +OS=$(uname -s) +case $OS in +Linux) SHA256=7b4b7a1d9f314f45722899c0521c261e4bfab4a6b532609e37fef391da6bade2;; +Darwin) SHA256=1851d1448964893fdc5a8c05863326119f397a3790e0c84c40b83499c7960267;; +esac + + +BIN=$PREFIX/bin + +if test -f $BIN/cmake && ($BIN/cmake --version | grep -q "$VERSION"); then + echo "CMake $VERSION already installed in $BIN" +else + FILE=cmake-$VERSION-$OS-x86_64.tar.gz + URL=https://cmake.org/files/v3.7/$FILE + ERROR=0 + TMPFILE=$(mktemp --tmpdir cmake-$VERSION-$OS-x86_64.XXXXXXXX.tar.gz) + echo "Downloading CMake ($URL)..." + wget "$URL" -O "$TMPFILE" -nv + if ! (shasum -a256 "$TMPFILE" | grep -q "$SHA256"); then + echo "Checksum mismatch ($TMPFILE)" + exit 1 + fi + mkdir -p "$PREFIX" + tar xzf "$TMPFILE" -C "$PREFIX" --strip 1 + rm $TMPFILE +fi diff --git a/test/libsolidity/Imports.cpp b/test/libsolidity/Imports.cpp index bc6adc26..56895fdc 100644 --- a/test/libsolidity/Imports.cpp +++ b/test/libsolidity/Imports.cpp @@ -164,6 +164,43 @@ BOOST_AUTO_TEST_CASE(context_dependent_remappings) BOOST_CHECK(c.compile()); } +BOOST_AUTO_TEST_CASE(filename_with_period) +{ + CompilerStack c; + c.addSource("a/a.sol", "import \".b.sol\"; contract A is B {} pragma solidity >=0.0;"); + c.addSource("a/.b.sol", "contract B {} pragma solidity >=0.0;"); + BOOST_CHECK(!c.compile()); +} + +BOOST_AUTO_TEST_CASE(context_dependent_remappings_ensure_default_and_module_preserved) +{ + CompilerStack c; + c.setRemappings(vector<string>{"foo=vendor/foo_2.0.0", "vendor/bar:foo=vendor/foo_1.0.0", "bar=vendor/bar"}); + c.addSource("main.sol", "import \"foo/foo.sol\"; import {Bar} from \"bar/bar.sol\"; contract Main is Foo2, Bar {} pragma solidity >=0.0;"); + c.addSource("vendor/bar/bar.sol", "import \"foo/foo.sol\"; contract Bar {Foo1 foo;} pragma solidity >=0.0;"); + c.addSource("vendor/foo_1.0.0/foo.sol", "contract Foo1 {} pragma solidity >=0.0;"); + c.addSource("vendor/foo_2.0.0/foo.sol", "contract Foo2 {} pragma solidity >=0.0;"); + BOOST_CHECK(c.compile()); +} + +BOOST_AUTO_TEST_CASE(context_dependent_remappings_order_independent) +{ + CompilerStack c; + c.setRemappings(vector<string>{"a:x/y/z=d", "a/b:x=e"}); + c.addSource("a/main.sol", "import \"x/y/z/z.sol\"; contract Main is D {} pragma solidity >=0.0;"); + c.addSource("a/b/main.sol", "import \"x/y/z/z.sol\"; contract Main is E {} pragma solidity >=0.0;"); + c.addSource("d/z.sol", "contract D {} pragma solidity >=0.0;"); + c.addSource("e/y/z/z.sol", "contract E {} pragma solidity >=0.0;"); + BOOST_CHECK(c.compile()); + CompilerStack d; + d.setRemappings(vector<string>{"a/b:x=e", "a:x/y/z=d"}); + d.addSource("a/main.sol", "import \"x/y/z/z.sol\"; contract Main is D {} pragma solidity >=0.0;"); + d.addSource("a/b/main.sol", "import \"x/y/z/z.sol\"; contract Main is E {} pragma solidity >=0.0;"); + d.addSource("d/z.sol", "contract D {} pragma solidity >=0.0;"); + d.addSource("e/y/z/z.sol", "contract E {} pragma solidity >=0.0;"); + BOOST_CHECK(d.compile()); +} + BOOST_AUTO_TEST_SUITE_END() } diff --git a/test/libsolidity/SolidityEndToEndTest.cpp b/test/libsolidity/SolidityEndToEndTest.cpp index 94d4fb7f..19161831 100644 --- a/test/libsolidity/SolidityEndToEndTest.cpp +++ b/test/libsolidity/SolidityEndToEndTest.cpp @@ -2770,6 +2770,28 @@ BOOST_AUTO_TEST_CASE(event_no_arguments) BOOST_CHECK_EQUAL(m_logs[0].topics[0], dev::keccak256(string("Deposit()"))); } +BOOST_AUTO_TEST_CASE(event_access_through_base_name) +{ + char const* sourceCode = R"( + contract A { + event x(); + } + contract B is A { + function f() returns (uint) { + A.x(); + return 1; + } + } + )"; + compileAndRun(sourceCode); + callContractFunction("f()"); + BOOST_REQUIRE_EQUAL(m_logs.size(), 1); + BOOST_CHECK_EQUAL(m_logs[0].address, m_contractAddress); + BOOST_CHECK(m_logs[0].data.empty()); + BOOST_REQUIRE_EQUAL(m_logs[0].topics.size(), 1); + BOOST_CHECK_EQUAL(m_logs[0].topics[0], dev::keccak256(string("x()"))); +} + BOOST_AUTO_TEST_CASE(event_anonymous) { char const* sourceCode = R"( @@ -4847,60 +4869,6 @@ BOOST_AUTO_TEST_CASE(proper_order_of_overwriting_of_attributes) BOOST_CHECK(callContractFunction("ok()") == encodeArgs(false)); } -BOOST_AUTO_TEST_CASE(proper_overwriting_accessor_by_function) -{ - // bug #1798 - char const* sourceCode = R"( - contract attribute { - bool ok = false; - } - contract func { - function ok() returns (bool) { return true; } - } - - contract attr_func is attribute, func { - function checkOk() returns (bool) { return ok(); } - } - contract func_attr is func, attribute { - function checkOk() returns (bool) { return ok; } - } - )"; - compileAndRun(sourceCode, 0, "attr_func"); - BOOST_CHECK(callContractFunction("ok()") == encodeArgs(true)); - compileAndRun(sourceCode, 0, "func_attr"); - BOOST_CHECK(callContractFunction("checkOk()") == encodeArgs(false)); -} - - -BOOST_AUTO_TEST_CASE(overwriting_inheritance) -{ - // bug #1798 - char const* sourceCode = R"( - contract A { - function ok() returns (uint) { return 1; } - } - contract B { - function ok() returns (uint) { return 2; } - } - contract C { - uint ok = 6; - } - contract AB is A, B { - function ok() returns (uint) { return 4; } - } - contract reversedE is C, AB { - function checkOk() returns (uint) { return ok(); } - } - contract E is AB, C { - function checkOk() returns (uint) { return ok; } - } - )"; - compileAndRun(sourceCode, 0, "reversedE"); - BOOST_CHECK(callContractFunction("checkOk()") == encodeArgs(4)); - compileAndRun(sourceCode, 0, "E"); - BOOST_CHECK(callContractFunction("checkOk()") == encodeArgs(6)); -} - BOOST_AUTO_TEST_CASE(struct_assign_reference_to_struct) { char const* sourceCode = R"( diff --git a/test/libsolidity/SolidityNameAndTypeResolution.cpp b/test/libsolidity/SolidityNameAndTypeResolution.cpp index 576421fd..9f6ea2b3 100644 --- a/test/libsolidity/SolidityNameAndTypeResolution.cpp +++ b/test/libsolidity/SolidityNameAndTypeResolution.cpp @@ -1056,7 +1056,9 @@ BOOST_AUTO_TEST_CASE(modifier_overrides_function) contract A { modifier mod(uint a) { _; } } contract B is A { function mod(uint a) { } } )"; - CHECK_ERROR(text, TypeError, ""); + // Error: Identifier already declared. + // Error: Override changes modifier to function. + CHECK_ERROR_ALLOW_MULTI(text, DeclarationError, ""); } BOOST_AUTO_TEST_CASE(function_overrides_modifier) @@ -1065,7 +1067,9 @@ BOOST_AUTO_TEST_CASE(function_overrides_modifier) contract A { function mod(uint a) { } } contract B is A { modifier mod(uint a) { _; } } )"; - CHECK_ERROR(text, TypeError, ""); + // Error: Identifier already declared. + // Error: Override changes function to modifier. + CHECK_ERROR_ALLOW_MULTI(text, DeclarationError, ""); } BOOST_AUTO_TEST_CASE(modifier_returns_value) @@ -4304,6 +4308,25 @@ BOOST_AUTO_TEST_CASE(illegal_override_payable_nonpayable) CHECK_ERROR(text, TypeError, ""); } +BOOST_AUTO_TEST_CASE(function_variable_mixin) +{ + // bug #1798 (cpp-ethereum), related to #1286 (solidity) + char const* text = R"( + contract attribute { + bool ok = false; + } + contract func { + function ok() returns (bool) { return true; } + } + + contract attr_func is attribute, func { + function checkOk() returns (bool) { return ok(); } + } + )"; + CHECK_ERROR(text, DeclarationError, ""); +} + + BOOST_AUTO_TEST_CASE(payable_constant_conflict) { char const* text = R"( |