diff options
Diffstat (limited to 'core/asm/lexer.go')
-rw-r--r-- | core/asm/lexer.go | 291 |
1 files changed, 291 insertions, 0 deletions
diff --git a/core/asm/lexer.go b/core/asm/lexer.go new file mode 100644 index 000000000..2770bd35f --- /dev/null +++ b/core/asm/lexer.go @@ -0,0 +1,291 @@ +// Copyright 2017 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. + +package asm + +import ( + "fmt" + "os" + "strings" + "unicode" + "unicode/utf8" +) + +// stateFn is used through the lifetime of the +// lexer to parse the different values at the +// current state. +type stateFn func(*lexer) stateFn + +// token is emitted when the lexer has discovered +// a new parsable token. These are delivered over +// the tokens channels of the lexer +type token struct { + typ tokenType + lineno int + text string +} + +// tokenType are the different types the lexer +// is able to parse and return. +type tokenType int + +const ( + eof tokenType = iota // end of file + lineStart // emitted when a line starts + lineEnd // emitted when a line ends + invalidStatement // any invalid statement + element // any element during element parsing + label // label is emitted when a labal is found + labelDef // label definition is emitted when a new label is found + number // number is emitted when a number is found + stringValue // stringValue is emitted when a string has been found + + Numbers = "1234567890" // characters representing any decimal number + HexadecimalNumbers = Numbers + "aAbBcCdDeEfF" // characters representing any hexadecimal + Alpha = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric +) + +// String implements stringer +func (it tokenType) String() string { + if int(it) > len(stringtokenTypes) { + return "invalid" + } + return stringtokenTypes[it] +} + +var stringtokenTypes = []string{ + eof: "EOF", + invalidStatement: "invalid statement", + element: "element", + lineEnd: "end of line", + lineStart: "new line", + label: "label", + labelDef: "label definition", + number: "number", + stringValue: "string", +} + +// lexer is the basic construct for parsing +// source code and turning them in to tokens. +// Tokens are interpreted by the compiler. +type lexer struct { + input string // input contains the source code of the program + + tokens chan token // tokens is used to deliver tokens to the listener + state stateFn // the current state function + + lineno int // current line number in the source file + start, pos, width int // positions for lexing and returning value + + debug bool // flag for triggering debug output +} + +// lex lexes the program by name with the given source. It returns a +// channel on which the tokens are delivered. +func Lex(name string, source []byte, debug bool) <-chan token { + ch := make(chan token) + l := &lexer{ + input: string(source), + tokens: ch, + state: lexLine, + debug: debug, + } + go func() { + l.emit(lineStart) + for l.state != nil { + l.state = l.state(l) + } + l.emit(eof) + close(l.tokens) + }() + + return ch +} + +// next returns the next rune in the program's source. +func (l *lexer) next() (rune rune) { + if l.pos >= len(l.input) { + l.width = 0 + return 0 + } + rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) + l.pos += l.width + return rune +} + +// backup backsup the last parsed element (multi-character) +func (l *lexer) backup() { + l.pos -= l.width +} + +// peek returns the next rune but does not advance the seeker +func (l *lexer) peek() rune { + r := l.next() + l.backup() + return r +} + +// ignore advances the seeker and ignores the value +func (l *lexer) ignore() { + l.start = l.pos +} + +// Accepts checks whether the given input matches the next rune +func (l *lexer) accept(valid string) bool { + if strings.IndexRune(valid, l.next()) >= 0 { + return true + } + + l.backup() + + return false +} + +// acceptRun will continue to advance the seeker until valid +// can no longer be met. +func (l *lexer) acceptRun(valid string) { + for strings.IndexRune(valid, l.next()) >= 0 { + } + l.backup() +} + +// acceptRunUntil is the inverse of acceptRun and will continue +// to advance the seeker until the rune has been found. +func (l *lexer) acceptRunUntil(until rune) bool { + // Continues running until a rune is found + for i := l.next(); strings.IndexRune(string(until), i) == -1; i = l.next() { + if i == 0 { + return false + } + } + + return true +} + +// blob returns the current value +func (l *lexer) blob() string { + return l.input[l.start:l.pos] +} + +// Emits a new token on to token channel for processing +func (l *lexer) emit(t tokenType) { + token := token{t, l.lineno, l.blob()} + + if l.debug { + fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text) + } + + l.tokens <- token + l.start = l.pos +} + +// lexLine is state function for lexing lines +func lexLine(l *lexer) stateFn { + for { + switch r := l.next(); { + case r == '\n': + l.emit(lineEnd) + l.ignore() + l.lineno++ + + l.emit(lineStart) + case r == ';' && l.peek() == ';': + return lexComment + case isSpace(r): + l.ignore() + case isAlphaNumeric(r) || r == '_': + return lexElement + case isNumber(r): + return lexNumber + case r == '@': + l.ignore() + return lexLabel + case r == '"': + return lexInsideString + default: + return nil + } + } +} + +// lexComment parses the current position until the end +// of the line and discards the text. +func lexComment(l *lexer) stateFn { + l.acceptRunUntil('\n') + l.ignore() + + return lexLine +} + +// lexLabel parses the current label, emits and returns +// the lex text state function to advance the parsing +// process. +func lexLabel(l *lexer) stateFn { + l.acceptRun(Alpha + "_") + + l.emit(label) + + return lexLine +} + +// lexInsideString lexes the inside of a string until +// until the state function finds the closing quote. +// It returns the lex text state function. +func lexInsideString(l *lexer) stateFn { + if l.acceptRunUntil('"') { + l.emit(stringValue) + } + + return lexLine +} + +func lexNumber(l *lexer) stateFn { + acceptance := Numbers + if l.accept("0") && l.accept("xX") { + acceptance = HexadecimalNumbers + } + l.acceptRun(acceptance) + + l.emit(number) + + return lexLine +} + +func lexElement(l *lexer) stateFn { + l.acceptRun(Alpha + "_" + Numbers) + + if l.peek() == ':' { + l.emit(labelDef) + + l.accept(":") + l.ignore() + } else { + l.emit(element) + } + return lexLine +} + +func isAlphaNumeric(t rune) bool { + return unicode.IsLetter(t) +} + +func isSpace(t rune) bool { + return unicode.IsSpace(t) +} + +func isNumber(t rune) bool { + return unicode.IsNumber(t) +} |