cmd/evm, core/asm: add EVM assembler (#3686)

The evm compile command implements a simple assembly language that compiles to EVM bytecode.
author: Jeffrey Wilcke <jeffrey@ethereum.org> 2017-03-01 08:11:24 +0800
committer: Felix Lange <fjl@users.noreply.github.com> 2017-03-01 08:11:24 +0800
commit: 230cf2ec9142b6a8f421cb8873deb5df1566e89c (patch)
tree: b437803c8d90384d8ecb68e8a180db384535c018 /core
parent: 7ff75ac2f2f8a62d52367271194aa7889a5b5c88 (diff)
download: dexon-230cf2ec9142b6a8f421cb8873deb5df1566e89c.tar.gz
dexon-230cf2ec9142b6a8f421cb8873deb5df1566e89c.tar.zst
dexon-230cf2ec9142b6a8f421cb8873deb5df1566e89c.zip
4 files changed, 610 insertions, 18 deletions
diff --git a/core/asm/compiler.go b/core/asm/compiler.go
new file mode 100644
index 000000000..b2c85375c
--- /dev/null
+++ b/core/asm/compiler.go
@@ -0,0 +1,281 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package asm
+
+import (
+	"errors"
+	"fmt"
+	"math/big"
+	"os"
+	"strings"
+
+	"github.com/ethereum/go-ethereum/common/math"
+	"github.com/ethereum/go-ethereum/core/vm"
+)
+
+// Compiler contains information about the parsed source
+// and holds the tokens for the program.
+type Compiler struct {
+	tokens []token
+	binary []interface{}
+
+	labels map[string]int
+
+	pc, pos int
+
+	debug bool
+}
+
+// newCompiler returns a new allocated compiler.
+func NewCompiler(debug bool) *Compiler {
+	return &Compiler{
+		labels: make(map[string]int),
+		debug:  debug,
+	}
+}
+
+// Feed feeds tokens in to ch and are interpreted by
+// the compiler.
+//
+// feed is the first pass in the compile stage as it
+// collect the used labels in the program and keeps a
+// program counter which is used to determine the locations
+// of the jump dests. The labels can than be used in the
+// second stage to push labels and determine the right
+// position.
+func (c *Compiler) Feed(ch <-chan token) {
+	for i := range ch {
+		switch i.typ {
+		case number:
+			num := math.MustParseBig256(i.text).Bytes()
+			if len(num) == 0 {
+				num = []byte{0}
+			}
+			c.pc += len(num)
+		case stringValue:
+			c.pc += len(i.text) - 2
+		case element:
+			c.pc++
+		case labelDef:
+			c.labels[i.text] = c.pc
+			c.pc++
+		case label:
+			c.pc += 5
+		}
+
+		c.tokens = append(c.tokens, i)
+	}
+	if c.debug {
+		fmt.Fprintln(os.Stderr, "found", len(c.labels), "labels")
+	}
+}
+
+// Compile compiles the current tokens and returns a
+// binary string that can be interpreted by the EVM
+// and an error if it failed.
+//
+// compile is the second stage in the compile phase
+// which compiles the tokens to EVM instructions.
+func (c *Compiler) Compile() (string, []error) {
+	var errors []error
+	// continue looping over the tokens until
+	// the stack has been exhausted.
+	for c.pos < len(c.tokens) {
+		if err := c.compileLine(); err != nil {
+			errors = append(errors, err)
+		}
+	}
+
+	// turn the binary to hex
+	var bin string
+	for _, v := range c.binary {
+		switch v := v.(type) {
+		case vm.OpCode:
+			bin += fmt.Sprintf("%x", []byte{byte(v)})
+		case []byte:
+			bin += fmt.Sprintf("%x", v)
+		}
+	}
+	return bin, errors
+}
+
+// next returns the next token and increments the
+// posititon.
+func (c *Compiler) next() token {
+	token := c.tokens[c.pos]
+	c.pos++
+	return token
+}
+
+// compile line compiles a single line instruction e.g.
+// "push 1", "jump @labal".
+func (c *Compiler) compileLine() error {
+	n := c.next()
+	if n.typ != lineStart {
+		return compileErr(n, n.typ.String(), lineStart.String())
+	}
+
+	lvalue := c.next()
+	switch lvalue.typ {
+	case eof:
+		return nil
+	case element:
+		if err := c.compileElement(lvalue); err != nil {
+			return err
+		}
+	case labelDef:
+		c.compileLabel()
+	case lineEnd:
+		return nil
+	default:
+		return compileErr(lvalue, lvalue.text, fmt.Sprintf("%v or %v", labelDef, element))
+	}
+
+	if n := c.next(); n.typ != lineEnd {
+		return compileErr(n, n.text, lineEnd.String())
+	}
+
+	return nil
+}
+
+// compileNumber compiles the number to bytes
+func (c *Compiler) compileNumber(element token) (int, error) {
+	num := math.MustParseBig256(element.text).Bytes()
+	if len(num) == 0 {
+		num = []byte{0}
+	}
+	c.pushBin(num)
+	return len(num), nil
+}
+
+// compileElement compiles the element (push & label or both)
+// to a binary representation and may error if incorrect statements
+// where fed.
+func (c *Compiler) compileElement(element token) error {
+	// check for a jump. jumps must be read and compiled
+	// from right to left.
+	if isJump(element.text) {
+		rvalue := c.next()
+		switch rvalue.typ {
+		case number:
+			// TODO figure out how to return the error properly
+			c.compileNumber(rvalue)
+		case stringValue:
+			// strings are quoted, remove them.
+			c.pushBin(rvalue.text[1 : len(rvalue.text)-2])
+		case label:
+			c.pushBin(vm.PUSH4)
+			pos := big.NewInt(int64(c.labels[rvalue.text])).Bytes()
+			pos = append(make([]byte, 4-len(pos)), pos...)
+			c.pushBin(pos)
+		default:
+			return compileErr(rvalue, rvalue.text, "number, string or label")
+		}
+		// push the operation
+		c.pushBin(toBinary(element.text))
+		return nil
+	} else if isPush(element.text) {
+		// handle pushes. pushes are read from left to right.
+		var value []byte
+
+		rvalue := c.next()
+		switch rvalue.typ {
+		case number:
+			value = math.MustParseBig256(rvalue.text).Bytes()
+			if len(value) == 0 {
+				value = []byte{0}
+			}
+		case stringValue:
+			value = []byte(rvalue.text[1 : len(rvalue.text)-1])
+		case label:
+			value = make([]byte, 4)
+			copy(value, big.NewInt(int64(c.labels[rvalue.text])).Bytes())
+		default:
+			return compileErr(rvalue, rvalue.text, "number, string or label")
+		}
+
+		if len(value) > 32 {
+			return fmt.Errorf("%d type error: unsupported string or number with size > 32", rvalue.lineno)
+		}
+
+		c.pushBin(vm.OpCode(int(vm.PUSH1) - 1 + len(value)))
+		c.pushBin(value)
+	} else {
+		c.pushBin(toBinary(element.text))
+	}
+
+	return nil
+}
+
+// compileLabel pushes a jumpdest to the binary slice.
+func (c *Compiler) compileLabel() {
+	c.pushBin(vm.JUMPDEST)
+}
+
+// pushBin pushes the value v to the binary stack.
+func (c *Compiler) pushBin(v interface{}) {
+	if c.debug {
+		fmt.Printf("%d: %v\n", len(c.binary), v)
+	}
+	c.binary = append(c.binary, v)
+}
+
+// isPush returns whether the string op is either any of
+// push(N).
+func isPush(op string) bool {
+	if op == "push" {
+		return true
+	}
+	return false
+}
+
+// isJump returns whether the string op is jump(i)
+func isJump(op string) bool {
+	return op == "jumpi" || op == "jump"
+}
+
+// toBinary converts text to a vm.OpCode
+func toBinary(text string) vm.OpCode {
+	if isPush(text) {
+		text = "push1"
+	}
+	return vm.StringToOp(strings.ToUpper(text))
+}
+
+type compileError struct {
+	got  string
+	want string
+
+	lineno int
+}
+
+func (err compileError) Error() string {
+	return fmt.Sprintf("%d syntax error: unexpected %v, expected %v", err.lineno, err.got, err.want)
+}
+
+var (
+	errExpBol            = errors.New("expected beginning of line")
+	errExpElementOrLabel = errors.New("expected beginning of line")
+)
+
+func compileErr(c token, got, want string) error {
+	return compileError{
+		got:    got,
+		want:   want,
+		lineno: c.lineno,
+	}
+}
diff --git a/core/asm/lex_test.go b/core/asm/lex_test.go
new file mode 100644
index 000000000..36e67bcf7
--- /dev/null
+++ b/core/asm/lex_test.go
@@ -0,0 +1,36 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package asm
+
+import "testing"
+
+func lexAll(src string) []token {
+	ch := Lex("test.asm", []byte(src), false)
+
+	var tokens []token
+	for i := range ch {
+		tokens = append(tokens, i)
+	}
+	return tokens
+}
+
+func TestComment(t *testing.T) {
+	tokens := lexAll(";; this is a comment")
+	if len(tokens) != 2 { // {new line, EOF}
+		t.Error("expected no tokens")
+	}
+}
diff --git a/core/asm/lexer.go b/core/asm/lexer.go
new file mode 100644
index 000000000..2770bd35f
--- /dev/null
+++ b/core/asm/lexer.go
@@ -0,0 +1,291 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package asm
+
+import (
+	"fmt"
+	"os"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+// stateFn is used through the lifetime of the
+// lexer to parse the different values at the
+// current state.
+type stateFn func(*lexer) stateFn
+
+// token is emitted when the lexer has discovered
+// a new parsable token. These are delivered over
+// the tokens channels of the lexer
+type token struct {
+	typ    tokenType
+	lineno int
+	text   string
+}
+
+// tokenType are the different types the lexer
+// is able to parse and return.
+type tokenType int
+
+const (
+	eof              tokenType = iota // end of file
+	lineStart                         // emitted when a line starts
+	lineEnd                           // emitted when a line ends
+	invalidStatement                  // any invalid statement
+	element                           // any element during element parsing
+	label                             // label is emitted when a labal is found
+	labelDef                          // label definition is emitted when a new label is found
+	number                            // number is emitted when a number is found
+	stringValue                       // stringValue is emitted when a string has been found
+
+	Numbers            = "1234567890"                                           // characters representing any decimal number
+	HexadecimalNumbers = Numbers + "aAbBcCdDeEfF"                               // characters representing any hexadecimal
+	Alpha              = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
+)
+
+// String implements stringer
+func (it tokenType) String() string {
+	if int(it) > len(stringtokenTypes) {
+		return "invalid"
+	}
+	return stringtokenTypes[it]
+}
+
+var stringtokenTypes = []string{
+	eof:              "EOF",
+	invalidStatement: "invalid statement",
+	element:          "element",
+	lineEnd:          "end of line",
+	lineStart:        "new line",
+	label:            "label",
+	labelDef:         "label definition",
+	number:           "number",
+	stringValue:      "string",
+}
+
+// lexer is the basic construct for parsing
+// source code and turning them in to tokens.
+// Tokens are interpreted by the compiler.
+type lexer struct {
+	input string // input contains the source code of the program
+
+	tokens chan token // tokens is used to deliver tokens to the listener
+	state  stateFn    // the current state function
+
+	lineno            int // current line number in the source file
+	start, pos, width int // positions for lexing and returning value
+
+	debug bool // flag for triggering debug output
+}
+
+// lex lexes the program by name with the given source. It returns a
+// channel on which the tokens are delivered.
+func Lex(name string, source []byte, debug bool) <-chan token {
+	ch := make(chan token)
+	l := &lexer{
+		input:  string(source),
+		tokens: ch,
+		state:  lexLine,
+		debug:  debug,
+	}
+	go func() {
+		l.emit(lineStart)
+		for l.state != nil {
+			l.state = l.state(l)
+		}
+		l.emit(eof)
+		close(l.tokens)
+	}()
+
+	return ch
+}
+
+// next returns the next rune in the program's source.
+func (l *lexer) next() (rune rune) {
+	if l.pos >= len(l.input) {
+		l.width = 0
+		return 0
+	}
+	rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
+	l.pos += l.width
+	return rune
+}
+
+// backup backsup the last parsed element (multi-character)
+func (l *lexer) backup() {
+	l.pos -= l.width
+}
+
+// peek returns the next rune but does not advance the seeker
+func (l *lexer) peek() rune {
+	r := l.next()
+	l.backup()
+	return r
+}
+
+// ignore advances the seeker and ignores the value
+func (l *lexer) ignore() {
+	l.start = l.pos
+}
+
+// Accepts checks whether the given input matches the next rune
+func (l *lexer) accept(valid string) bool {
+	if strings.IndexRune(valid, l.next()) >= 0 {
+		return true
+	}
+
+	l.backup()
+
+	return false
+}
+
+// acceptRun will continue to advance the seeker until valid
+// can no longer be met.
+func (l *lexer) acceptRun(valid string) {
+	for strings.IndexRune(valid, l.next()) >= 0 {
+	}
+	l.backup()
+}
+
+// acceptRunUntil is the inverse of acceptRun and will continue
+// to advance the seeker until the rune has been found.
+func (l *lexer) acceptRunUntil(until rune) bool {
+	// Continues running until a rune is found
+	for i := l.next(); strings.IndexRune(string(until), i) == -1; i = l.next() {
+		if i == 0 {
+			return false
+		}
+	}
+
+	return true
+}
+
+// blob returns the current value
+func (l *lexer) blob() string {
+	return l.input[l.start:l.pos]
+}
+
+// Emits a new token on to token channel for processing
+func (l *lexer) emit(t tokenType) {
+	token := token{t, l.lineno, l.blob()}
+
+	if l.debug {
+		fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text)
+	}
+
+	l.tokens <- token
+	l.start = l.pos
+}
+
+// lexLine is state function for lexing lines
+func lexLine(l *lexer) stateFn {
+	for {
+		switch r := l.next(); {
+		case r == '\n':
+			l.emit(lineEnd)
+			l.ignore()
+			l.lineno++
+
+			l.emit(lineStart)
+		case r == ';' && l.peek() == ';':
+			return lexComment
+		case isSpace(r):
+			l.ignore()
+		case isAlphaNumeric(r) || r == '_':
+			return lexElement
+		case isNumber(r):
+			return lexNumber
+		case r == '@':
+			l.ignore()
+			return lexLabel
+		case r == '"':
+			return lexInsideString
+		default:
+			return nil
+		}
+	}
+}
+
+// lexComment parses the current position until the end
+// of the line and discards the text.
+func lexComment(l *lexer) stateFn {
+	l.acceptRunUntil('\n')
+	l.ignore()
+
+	return lexLine
+}
+
+// lexLabel parses the current label, emits and returns
+// the lex text state function to advance the parsing
+// process.
+func lexLabel(l *lexer) stateFn {
+	l.acceptRun(Alpha + "_")
+
+	l.emit(label)
+
+	return lexLine
+}
+
+// lexInsideString lexes the inside of a string until
+// until the state function finds the closing quote.
+// It returns the lex text state function.
+func lexInsideString(l *lexer) stateFn {
+	if l.acceptRunUntil('"') {
+		l.emit(stringValue)
+	}
+
+	return lexLine
+}
+
+func lexNumber(l *lexer) stateFn {
+	acceptance := Numbers
+	if l.accept("0") && l.accept("xX") {
+		acceptance = HexadecimalNumbers
+	}
+	l.acceptRun(acceptance)
+
+	l.emit(number)
+
+	return lexLine
+}
+
+func lexElement(l *lexer) stateFn {
+	l.acceptRun(Alpha + "_" + Numbers)
+
+	if l.peek() == ':' {
+		l.emit(labelDef)
+
+		l.accept(":")
+		l.ignore()
+	} else {
+		l.emit(element)
+	}
+	return lexLine
+}
+
+func isAlphaNumeric(t rune) bool {
+	return unicode.IsLetter(t)
+}
+
+func isSpace(t rune) bool {
+	return unicode.IsSpace(t)
+}
+
+func isNumber(t rune) bool {
+	return unicode.IsNumber(t)
+}
diff --git a/core/vm/logger.go b/core/vm/logger.go
index 102a215c7..ccdf21a4b 100644
--- a/core/vm/logger.go
+++ b/core/vm/logger.go
@@ -17,10 +17,10 @@
 package vm
 
 import (
+	"encoding/hex"
 	"fmt"
 	"math/big"
 	"os"
-	"unicode"
 
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/common/math"
@@ -184,24 +184,8 @@ func StdErrFormat(logs []StructLog) {
 			fmt.Fprintf(os.Stderr, "%04d: %x\n", len(log.Stack)-i-1, math.PaddedBigBytes(log.Stack[i], 32))
 		}
 
-		const maxMem = 10
-		addr := 0
 		fmt.Fprintln(os.Stderr, "MEM =", len(log.Memory))
-		for i := 0; i+16 <= len(log.Memory) && addr < maxMem; i += 16 {
-			data := log.Memory[i : i+16]
-			str := fmt.Sprintf("%04d: % x  ", addr*16, data)
-			for _, r := range data {
-				if r == 0 {
-					str += "."
-				} else if unicode.IsPrint(rune(r)) {
-					str += fmt.Sprintf("%s", string(r))
-				} else {
-					str += "?"
-				}
-			}
-			addr++
-			fmt.Fprintln(os.Stderr, str)
-		}
+		fmt.Fprintln(os.Stderr, hex.Dump(log.Memory))
 
 		fmt.Fprintln(os.Stderr, "STORAGE =", len(log.Storage))
 		for h, item := range log.Storage {
author	Jeffrey Wilcke <jeffrey@ethereum.org>	2017-03-01 08:11:24 +0800
committer	Felix Lange <fjl@users.noreply.github.com>	2017-03-01 08:11:24 +0800
commit	230cf2ec9142b6a8f421cb8873deb5df1566e89c (patch)
tree	b437803c8d90384d8ecb68e8a180db384535c018 /core
parent	7ff75ac2f2f8a62d52367271194aa7889a5b5c88 (diff)
download	dexon-230cf2ec9142b6a8f421cb8873deb5df1566e89c.tar.gz dexon-230cf2ec9142b6a8f421cb8873deb5df1566e89c.tar.zst dexon-230cf2ec9142b6a8f421cb8873deb5df1566e89c.zip