diff options
author | Taylor Gerring <taylor.gerring@gmail.com> | 2015-02-16 21:28:33 +0800 |
---|---|---|
committer | Taylor Gerring <taylor.gerring@gmail.com> | 2015-02-16 21:28:33 +0800 |
commit | 702218008ee2b6d708d6b2821cdef80736bb3224 (patch) | |
tree | d55ff7ce88187082378e7d8e4c2f3aad14d23b4e /Godeps/_workspace/src/github.com/robertkrimen/otto/parser | |
parent | 202362d9258335c695eb75f55f4be74a50a1af33 (diff) | |
download | go-tangerine-702218008ee2b6d708d6b2821cdef80736bb3224.tar.gz go-tangerine-702218008ee2b6d708d6b2821cdef80736bb3224.tar.zst go-tangerine-702218008ee2b6d708d6b2821cdef80736bb3224.zip |
Add versioned dependencies from godep
Diffstat (limited to 'Godeps/_workspace/src/github.com/robertkrimen/otto/parser')
14 files changed, 5813 insertions, 0 deletions
diff --git a/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/Makefile b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/Makefile new file mode 100644 index 000000000..766fd4d0b --- /dev/null +++ b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/Makefile @@ -0,0 +1,4 @@ +.PHONY: test + +test: + go test diff --git a/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/README.markdown b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/README.markdown new file mode 100644 index 000000000..c3cae5b60 --- /dev/null +++ b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/README.markdown @@ -0,0 +1,190 @@ +# parser +-- + import "github.com/robertkrimen/otto/parser" + +Package parser implements a parser for JavaScript. + + import ( + "github.com/robertkrimen/otto/parser" + ) + +Parse and return an AST + + filename := "" // A filename is optional + src := ` + // Sample xyzzy example + (function(){ + if (3.14159 > 0) { + console.log("Hello, World."); + return; + } + + var xyzzy = NaN; + console.log("Nothing happens."); + return xyzzy; + })(); + ` + + // Parse some JavaScript, yielding a *ast.Program and/or an ErrorList + program, err := parser.ParseFile(nil, filename, src, 0) + + +### Warning + +The parser and AST interfaces are still works-in-progress (particularly where +node types are concerned) and may change in the future. + +## Usage + +#### func ParseFile + +```go +func ParseFile(fileSet *file.FileSet, filename string, src interface{}, mode Mode) (*ast.Program, error) +``` +ParseFile parses the source code of a single JavaScript/ECMAScript source file +and returns the corresponding ast.Program node. + +If fileSet == nil, ParseFile parses source without a FileSet. If fileSet != nil, +ParseFile first adds filename and src to fileSet. + +The filename argument is optional and is used for labelling errors, etc. + +src may be a string, a byte slice, a bytes.Buffer, or an io.Reader, but it MUST +always be in UTF-8. + + // Parse some JavaScript, yielding a *ast.Program and/or an ErrorList + program, err := parser.ParseFile(nil, "", `if (abc > 1) {}`, 0) + +#### func ParseFunction + +```go +func ParseFunction(parameterList, body string) (*ast.FunctionLiteral, error) +``` +ParseFunction parses a given parameter list and body as a function and returns +the corresponding ast.FunctionLiteral node. + +The parameter list, if any, should be a comma-separated list of identifiers. + +#### func ReadSource + +```go +func ReadSource(filename string, src interface{}) ([]byte, error) +``` + +#### func TransformRegExp + +```go +func TransformRegExp(pattern string) (string, error) +``` +TransformRegExp transforms a JavaScript pattern into a Go "regexp" pattern. + +re2 (Go) cannot do backtracking, so the presence of a lookahead (?=) (?!) or +backreference (\1, \2, ...) will cause an error. + +re2 (Go) has a different definition for \s: [\t\n\f\r ]. The JavaScript +definition, on the other hand, also includes \v, Unicode "Separator, Space", +etc. + +If the pattern is invalid (not valid even in JavaScript), then this function +returns the empty string and an error. + +If the pattern is valid, but incompatible (contains a lookahead or +backreference), then this function returns the transformation (a non-empty +string) AND an error. + +#### type Error + +```go +type Error struct { + Position file.Position + Message string +} +``` + +An Error represents a parsing error. It includes the position where the error +occurred and a message/description. + +#### func (Error) Error + +```go +func (self Error) Error() string +``` + +#### type ErrorList + +```go +type ErrorList []*Error +``` + +ErrorList is a list of *Errors. + +#### func (*ErrorList) Add + +```go +func (self *ErrorList) Add(position file.Position, msg string) +``` +Add adds an Error with given position and message to an ErrorList. + +#### func (ErrorList) Err + +```go +func (self ErrorList) Err() error +``` +Err returns an error equivalent to this ErrorList. If the list is empty, Err +returns nil. + +#### func (ErrorList) Error + +```go +func (self ErrorList) Error() string +``` +Error implements the Error interface. + +#### func (ErrorList) Len + +```go +func (self ErrorList) Len() int +``` + +#### func (ErrorList) Less + +```go +func (self ErrorList) Less(i, j int) bool +``` + +#### func (*ErrorList) Reset + +```go +func (self *ErrorList) Reset() +``` +Reset resets an ErrorList to no errors. + +#### func (ErrorList) Sort + +```go +func (self ErrorList) Sort() +``` + +#### func (ErrorList) Swap + +```go +func (self ErrorList) Swap(i, j int) +``` + +#### type Mode + +```go +type Mode uint +``` + +A Mode value is a set of flags (or 0). They control optional parser +functionality. + +```go +const ( + IgnoreRegExpErrors Mode = 1 << iota // Ignore RegExp compatibility errors (allow backtracking) +) +``` + +-- +**godocdown** http://github.com/robertkrimen/godocdown diff --git a/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/dbg.go b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/dbg.go new file mode 100644 index 000000000..3c5f2f698 --- /dev/null +++ b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/dbg.go @@ -0,0 +1,9 @@ +// This file was AUTOMATICALLY GENERATED by dbg-import (smuggol) for github.com/robertkrimen/dbg + +package parser + +import ( + Dbg "github.com/robertkrimen/otto/dbg" +) + +var dbg, dbgf = Dbg.New() diff --git a/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/error.go b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/error.go new file mode 100644 index 000000000..e0f74a5cf --- /dev/null +++ b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/error.go @@ -0,0 +1,175 @@ +package parser + +import ( + "fmt" + "sort" + + "github.com/robertkrimen/otto/file" + "github.com/robertkrimen/otto/token" +) + +const ( + err_UnexpectedToken = "Unexpected token %v" + err_UnexpectedEndOfInput = "Unexpected end of input" + err_UnexpectedEscape = "Unexpected escape" +) + +// UnexpectedNumber: 'Unexpected number', +// UnexpectedString: 'Unexpected string', +// UnexpectedIdentifier: 'Unexpected identifier', +// UnexpectedReserved: 'Unexpected reserved word', +// NewlineAfterThrow: 'Illegal newline after throw', +// InvalidRegExp: 'Invalid regular expression', +// UnterminatedRegExp: 'Invalid regular expression: missing /', +// InvalidLHSInAssignment: 'Invalid left-hand side in assignment', +// InvalidLHSInForIn: 'Invalid left-hand side in for-in', +// MultipleDefaultsInSwitch: 'More than one default clause in switch statement', +// NoCatchOrFinally: 'Missing catch or finally after try', +// UnknownLabel: 'Undefined label \'%0\'', +// Redeclaration: '%0 \'%1\' has already been declared', +// IllegalContinue: 'Illegal continue statement', +// IllegalBreak: 'Illegal break statement', +// IllegalReturn: 'Illegal return statement', +// StrictModeWith: 'Strict mode code may not include a with statement', +// StrictCatchVariable: 'Catch variable may not be eval or arguments in strict mode', +// StrictVarName: 'Variable name may not be eval or arguments in strict mode', +// StrictParamName: 'Parameter name eval or arguments is not allowed in strict mode', +// StrictParamDupe: 'Strict mode function may not have duplicate parameter names', +// StrictFunctionName: 'Function name may not be eval or arguments in strict mode', +// StrictOctalLiteral: 'Octal literals are not allowed in strict mode.', +// StrictDelete: 'Delete of an unqualified identifier in strict mode.', +// StrictDuplicateProperty: 'Duplicate data property in object literal not allowed in strict mode', +// AccessorDataProperty: 'Object literal may not have data and accessor property with the same name', +// AccessorGetSet: 'Object literal may not have multiple get/set accessors with the same name', +// StrictLHSAssignment: 'Assignment to eval or arguments is not allowed in strict mode', +// StrictLHSPostfix: 'Postfix increment/decrement may not have eval or arguments operand in strict mode', +// StrictLHSPrefix: 'Prefix increment/decrement may not have eval or arguments operand in strict mode', +// StrictReservedWord: 'Use of future reserved word in strict mode' + +// A SyntaxError is a description of an ECMAScript syntax error. + +// An Error represents a parsing error. It includes the position where the error occurred and a message/description. +type Error struct { + Position file.Position + Message string +} + +// FIXME Should this be "SyntaxError"? + +func (self Error) Error() string { + filename := self.Position.Filename + if filename == "" { + filename = "(anonymous)" + } + return fmt.Sprintf("%s: Line %d:%d %s", + filename, + self.Position.Line, + self.Position.Column, + self.Message, + ) +} + +func (self *_parser) error(place interface{}, msg string, msgValues ...interface{}) *Error { + idx := file.Idx(0) + switch place := place.(type) { + case int: + idx = self.idxOf(place) + case file.Idx: + if place == 0 { + idx = self.idxOf(self.chrOffset) + } else { + idx = place + } + default: + panic(fmt.Errorf("error(%T, ...)", place)) + } + + position := self.position(idx) + msg = fmt.Sprintf(msg, msgValues...) + self.errors.Add(position, msg) + return self.errors[len(self.errors)-1] +} + +func (self *_parser) errorUnexpected(idx file.Idx, chr rune) error { + if chr == -1 { + return self.error(idx, err_UnexpectedEndOfInput) + } + return self.error(idx, err_UnexpectedToken, token.ILLEGAL) +} + +func (self *_parser) errorUnexpectedToken(tkn token.Token) error { + switch tkn { + case token.EOF: + return self.error(file.Idx(0), err_UnexpectedEndOfInput) + } + value := tkn.String() + switch tkn { + case token.BOOLEAN, token.NULL: + value = self.literal + case token.IDENTIFIER: + return self.error(self.idx, "Unexpected identifier") + case token.KEYWORD: + // TODO Might be a future reserved word + return self.error(self.idx, "Unexpected reserved word") + case token.NUMBER: + return self.error(self.idx, "Unexpected number") + case token.STRING: + return self.error(self.idx, "Unexpected string") + } + return self.error(self.idx, err_UnexpectedToken, value) +} + +// ErrorList is a list of *Errors. +// +type ErrorList []*Error + +// Add adds an Error with given position and message to an ErrorList. +func (self *ErrorList) Add(position file.Position, msg string) { + *self = append(*self, &Error{position, msg}) +} + +// Reset resets an ErrorList to no errors. +func (self *ErrorList) Reset() { *self = (*self)[0:0] } + +func (self ErrorList) Len() int { return len(self) } +func (self ErrorList) Swap(i, j int) { self[i], self[j] = self[j], self[i] } +func (self ErrorList) Less(i, j int) bool { + x := &self[i].Position + y := &self[j].Position + if x.Filename < y.Filename { + return true + } + if x.Filename == y.Filename { + if x.Line < y.Line { + return true + } + if x.Line == y.Line { + return x.Column < y.Column + } + } + return false +} + +func (self ErrorList) Sort() { + sort.Sort(self) +} + +// Error implements the Error interface. +func (self ErrorList) Error() string { + switch len(self) { + case 0: + return "no errors" + case 1: + return self[0].Error() + } + return fmt.Sprintf("%s (and %d more errors)", self[0].Error(), len(self)-1) +} + +// Err returns an error equivalent to this ErrorList. +// If the list is empty, Err returns nil. +func (self ErrorList) Err() error { + if len(self) == 0 { + return nil + } + return self +} diff --git a/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/expression.go b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/expression.go new file mode 100644 index 000000000..dc397b5cb --- /dev/null +++ b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/expression.go @@ -0,0 +1,815 @@ +package parser + +import ( + "regexp" + + "github.com/robertkrimen/otto/ast" + "github.com/robertkrimen/otto/file" + "github.com/robertkrimen/otto/token" +) + +func (self *_parser) parseIdentifier() *ast.Identifier { + literal := self.literal + idx := self.idx + self.next() + return &ast.Identifier{ + Name: literal, + Idx: idx, + } +} + +func (self *_parser) parsePrimaryExpression() ast.Expression { + literal := self.literal + idx := self.idx + switch self.token { + case token.IDENTIFIER: + self.next() + if len(literal) > 1 { + tkn, strict := token.IsKeyword(literal) + if tkn == token.KEYWORD { + if !strict { + self.error(idx, "Unexpected reserved word") + } + } + } + return &ast.Identifier{ + Name: literal, + Idx: idx, + } + case token.NULL: + self.next() + return &ast.NullLiteral{ + Idx: idx, + Literal: literal, + } + case token.BOOLEAN: + self.next() + value := false + switch literal { + case "true": + value = true + case "false": + value = false + default: + self.error(idx, "Illegal boolean literal") + } + return &ast.BooleanLiteral{ + Idx: idx, + Literal: literal, + Value: value, + } + case token.STRING: + self.next() + value, err := parseStringLiteral(literal[1 : len(literal)-1]) + if err != nil { + self.error(idx, err.Error()) + } + return &ast.StringLiteral{ + Idx: idx, + Literal: literal, + Value: value, + } + case token.NUMBER: + self.next() + value, err := parseNumberLiteral(literal) + if err != nil { + self.error(idx, err.Error()) + value = 0 + } + return &ast.NumberLiteral{ + Idx: idx, + Literal: literal, + Value: value, + } + case token.SLASH, token.QUOTIENT_ASSIGN: + return self.parseRegExpLiteral() + case token.LEFT_BRACE: + return self.parseObjectLiteral() + case token.LEFT_BRACKET: + return self.parseArrayLiteral() + case token.LEFT_PARENTHESIS: + self.expect(token.LEFT_PARENTHESIS) + expression := self.parseExpression() + self.expect(token.RIGHT_PARENTHESIS) + return expression + case token.THIS: + self.next() + return &ast.ThisExpression{ + Idx: idx, + } + case token.FUNCTION: + return self.parseFunction(false) + } + + self.errorUnexpectedToken(self.token) + self.nextStatement() + return &ast.BadExpression{From: idx, To: self.idx} +} + +func (self *_parser) parseRegExpLiteral() *ast.RegExpLiteral { + + offset := self.chrOffset - 1 // Opening slash already gotten + if self.token == token.QUOTIENT_ASSIGN { + offset -= 1 // = + } + idx := self.idxOf(offset) + + pattern, err := self.scanString(offset) + endOffset := self.chrOffset + + self.next() + if err == nil { + pattern = pattern[1 : len(pattern)-1] + } + + flags := "" + if self.token == token.IDENTIFIER { // gim + + flags = self.literal + self.next() + endOffset = self.chrOffset - 1 + } + + var value string + // TODO 15.10 + { + // Test during parsing that this is a valid regular expression + // Sorry, (?=) and (?!) are invalid (for now) + pattern, err := TransformRegExp(pattern) + if err != nil { + if pattern == "" || self.mode&IgnoreRegExpErrors == 0 { + self.error(idx, "Invalid regular expression: %s", err.Error()) + } + } else { + _, err = regexp.Compile(pattern) + if err != nil { + // We should not get here, ParseRegExp should catch any errors + self.error(idx, "Invalid regular expression: %s", err.Error()[22:]) // Skip redundant "parse regexp error" + } else { + value = pattern + } + } + } + + literal := self.str[offset:endOffset] + + return &ast.RegExpLiteral{ + Idx: idx, + Literal: literal, + Pattern: pattern, + Flags: flags, + Value: value, + } +} + +func (self *_parser) parseVariableDeclaration(declarationList *[]*ast.VariableExpression) ast.Expression { + + if self.token != token.IDENTIFIER { + idx := self.expect(token.IDENTIFIER) + self.nextStatement() + return &ast.BadExpression{From: idx, To: self.idx} + } + + literal := self.literal + idx := self.idx + self.next() + node := &ast.VariableExpression{ + Name: literal, + Idx: idx, + } + + if declarationList != nil { + *declarationList = append(*declarationList, node) + } + + if self.token == token.ASSIGN { + self.next() + node.Initializer = self.parseAssignmentExpression() + } + + return node +} + +func (self *_parser) parseVariableDeclarationList(var_ file.Idx) []ast.Expression { + + var declarationList []*ast.VariableExpression // Avoid bad expressions + var list []ast.Expression + + for { + list = append(list, self.parseVariableDeclaration(&declarationList)) + if self.token != token.COMMA { + break + } + self.next() + } + + self.scope.declare(&ast.VariableDeclaration{ + Var: var_, + List: declarationList, + }) + + return list +} + +func (self *_parser) parseObjectPropertyKey() (string, string) { + idx, tkn, literal := self.idx, self.token, self.literal + value := "" + self.next() + switch tkn { + case token.IDENTIFIER: + value = literal + case token.NUMBER: + var err error + _, err = parseNumberLiteral(literal) + if err != nil { + self.error(idx, err.Error()) + } else { + value = literal + } + case token.STRING: + var err error + value, err = parseStringLiteral(literal[1 : len(literal)-1]) + if err != nil { + self.error(idx, err.Error()) + } + default: + // null, false, class, etc. + if matchIdentifier.MatchString(literal) { + value = literal + } + } + return literal, value +} + +func (self *_parser) parseObjectProperty() ast.Property { + + literal, value := self.parseObjectPropertyKey() + if literal == "get" && self.token != token.COLON { + idx := self.idx + _, value := self.parseObjectPropertyKey() + parameterList := self.parseFunctionParameterList() + + node := &ast.FunctionLiteral{ + Function: idx, + ParameterList: parameterList, + } + self.parseFunctionBlock(node) + return ast.Property{ + Key: value, + Kind: "get", + Value: node, + } + } else if literal == "set" && self.token != token.COLON { + idx := self.idx + _, value := self.parseObjectPropertyKey() + parameterList := self.parseFunctionParameterList() + + node := &ast.FunctionLiteral{ + Function: idx, + ParameterList: parameterList, + } + self.parseFunctionBlock(node) + return ast.Property{ + Key: value, + Kind: "set", + Value: node, + } + } + + self.expect(token.COLON) + + return ast.Property{ + Key: value, + Kind: "value", + Value: self.parseAssignmentExpression(), + } +} + +func (self *_parser) parseObjectLiteral() ast.Expression { + var value []ast.Property + idx0 := self.expect(token.LEFT_BRACE) + for self.token != token.RIGHT_BRACE && self.token != token.EOF { + property := self.parseObjectProperty() + value = append(value, property) + if self.token == token.COMMA { + self.next() + continue + } + } + idx1 := self.expect(token.RIGHT_BRACE) + + return &ast.ObjectLiteral{ + LeftBrace: idx0, + RightBrace: idx1, + Value: value, + } +} + +func (self *_parser) parseArrayLiteral() ast.Expression { + + idx0 := self.expect(token.LEFT_BRACKET) + var value []ast.Expression + for self.token != token.RIGHT_BRACKET && self.token != token.EOF { + if self.token == token.COMMA { + self.next() + value = append(value, nil) + continue + } + value = append(value, self.parseAssignmentExpression()) + if self.token != token.RIGHT_BRACKET { + self.expect(token.COMMA) + } + } + idx1 := self.expect(token.RIGHT_BRACKET) + + return &ast.ArrayLiteral{ + LeftBracket: idx0, + RightBracket: idx1, + Value: value, + } +} + +func (self *_parser) parseArgumentList() (argumentList []ast.Expression, idx0, idx1 file.Idx) { + idx0 = self.expect(token.LEFT_PARENTHESIS) + if self.token != token.RIGHT_PARENTHESIS { + for { + argumentList = append(argumentList, self.parseAssignmentExpression()) + if self.token != token.COMMA { + break + } + self.next() + } + } + idx1 = self.expect(token.RIGHT_PARENTHESIS) + return +} + +func (self *_parser) parseCallExpression(left ast.Expression) ast.Expression { + argumentList, idx0, idx1 := self.parseArgumentList() + return &ast.CallExpression{ + Callee: left, + LeftParenthesis: idx0, + ArgumentList: argumentList, + RightParenthesis: idx1, + } +} + +func (self *_parser) parseDotMember(left ast.Expression) ast.Expression { + period := self.expect(token.PERIOD) + + literal := self.literal + idx := self.idx + + if !matchIdentifier.MatchString(literal) { + self.expect(token.IDENTIFIER) + self.nextStatement() + return &ast.BadExpression{From: period, To: self.idx} + } + + self.next() + + return &ast.DotExpression{ + Left: left, + Identifier: ast.Identifier{ + Idx: idx, + Name: literal, + }, + } +} + +func (self *_parser) parseBracketMember(left ast.Expression) ast.Expression { + idx0 := self.expect(token.LEFT_BRACKET) + member := self.parseExpression() + idx1 := self.expect(token.RIGHT_BRACKET) + return &ast.BracketExpression{ + LeftBracket: idx0, + Left: left, + Member: member, + RightBracket: idx1, + } +} + +func (self *_parser) parseNewExpression() ast.Expression { + idx := self.expect(token.NEW) + callee := self.parseLeftHandSideExpression() + node := &ast.NewExpression{ + New: idx, + Callee: callee, + } + if self.token == token.LEFT_PARENTHESIS { + argumentList, idx0, idx1 := self.parseArgumentList() + node.ArgumentList = argumentList + node.LeftParenthesis = idx0 + node.RightParenthesis = idx1 + } + return node +} + +func (self *_parser) parseLeftHandSideExpression() ast.Expression { + + var left ast.Expression + if self.token == token.NEW { + left = self.parseNewExpression() + } else { + left = self.parsePrimaryExpression() + } + + for { + if self.token == token.PERIOD { + left = self.parseDotMember(left) + } else if self.token == token.LEFT_BRACE { + left = self.parseBracketMember(left) + } else { + break + } + } + + return left +} + +func (self *_parser) parseLeftHandSideExpressionAllowCall() ast.Expression { + + allowIn := self.scope.allowIn + self.scope.allowIn = true + defer func() { + self.scope.allowIn = allowIn + }() + + var left ast.Expression + if self.token == token.NEW { + left = self.parseNewExpression() + } else { + left = self.parsePrimaryExpression() + } + + for { + if self.token == token.PERIOD { + left = self.parseDotMember(left) + } else if self.token == token.LEFT_BRACKET { + left = self.parseBracketMember(left) + } else if self.token == token.LEFT_PARENTHESIS { + left = self.parseCallExpression(left) + } else { + break + } + } + + return left +} + +func (self *_parser) parsePostfixExpression() ast.Expression { + operand := self.parseLeftHandSideExpressionAllowCall() + + switch self.token { + case token.INCREMENT, token.DECREMENT: + // Make sure there is no line terminator here + if self.implicitSemicolon { + break + } + tkn := self.token + idx := self.idx + self.next() + switch operand.(type) { + case *ast.Identifier, *ast.DotExpression, *ast.BracketExpression: + default: + self.error(idx, "Invalid left-hand side in assignment") + self.nextStatement() + return &ast.BadExpression{From: idx, To: self.idx} + } + return &ast.UnaryExpression{ + Operator: tkn, + Idx: idx, + Operand: operand, + Postfix: true, + } + } + + return operand +} + +func (self *_parser) parseUnaryExpression() ast.Expression { + + switch self.token { + case token.PLUS, token.MINUS, token.NOT, token.BITWISE_NOT: + fallthrough + case token.DELETE, token.VOID, token.TYPEOF: + tkn := self.token + idx := self.idx + self.next() + return &ast.UnaryExpression{ + Operator: tkn, + Idx: idx, + Operand: self.parseUnaryExpression(), + } + case token.INCREMENT, token.DECREMENT: + tkn := self.token + idx := self.idx + self.next() + operand := self.parseUnaryExpression() + switch operand.(type) { + case *ast.Identifier, *ast.DotExpression, *ast.BracketExpression: + default: + self.error(idx, "Invalid left-hand side in assignment") + self.nextStatement() + return &ast.BadExpression{From: idx, To: self.idx} + } + return &ast.UnaryExpression{ + Operator: tkn, + Idx: idx, + Operand: operand, + } + } + + return self.parsePostfixExpression() +} + +func (self *_parser) parseMultiplicativeExpression() ast.Expression { + next := self.parseUnaryExpression + left := next() + + for self.token == token.MULTIPLY || self.token == token.SLASH || + self.token == token.REMAINDER { + tkn := self.token + self.next() + left = &ast.BinaryExpression{ + Operator: tkn, + Left: left, + Right: next(), + } + } + + return left +} + +func (self *_parser) parseAdditiveExpression() ast.Expression { + next := self.parseMultiplicativeExpression + left := next() + + for self.token == token.PLUS || self.token == token.MINUS { + tkn := self.token + self.next() + left = &ast.BinaryExpression{ + Operator: tkn, + Left: left, + Right: next(), + } + } + + return left +} + +func (self *_parser) parseShiftExpression() ast.Expression { + next := self.parseAdditiveExpression + left := next() + + for self.token == token.SHIFT_LEFT || self.token == token.SHIFT_RIGHT || + self.token == token.UNSIGNED_SHIFT_RIGHT { + tkn := self.token + self.next() + left = &ast.BinaryExpression{ + Operator: tkn, + Left: left, + Right: next(), + } + } + + return left +} + +func (self *_parser) parseRelationalExpression() ast.Expression { + next := self.parseShiftExpression + left := next() + + allowIn := self.scope.allowIn + self.scope.allowIn = true + defer func() { + self.scope.allowIn = allowIn + }() + + switch self.token { + case token.LESS, token.LESS_OR_EQUAL, token.GREATER, token.GREATER_OR_EQUAL: + tkn := self.token + self.next() + return &ast.BinaryExpression{ + Operator: tkn, + Left: left, + Right: self.parseRelationalExpression(), + Comparison: true, + } + case token.INSTANCEOF: + tkn := self.token + self.next() + return &ast.BinaryExpression{ + Operator: tkn, + Left: left, + Right: self.parseRelationalExpression(), + } + case token.IN: + if !allowIn { + return left + } + tkn := self.token + self.next() + return &ast.BinaryExpression{ + Operator: tkn, + Left: left, + Right: self.parseRelationalExpression(), + } + } + + return left +} + +func (self *_parser) parseEqualityExpression() ast.Expression { + next := self.parseRelationalExpression + left := next() + + for self.token == token.EQUAL || self.token == token.NOT_EQUAL || + self.token == token.STRICT_EQUAL || self.token == token.STRICT_NOT_EQUAL { + tkn := self.token + self.next() + left = &ast.BinaryExpression{ + Operator: tkn, + Left: left, + Right: next(), + Comparison: true, + } + } + + return left +} + +func (self *_parser) parseBitwiseAndExpression() ast.Expression { + next := self.parseEqualityExpression + left := next() + + for self.token == token.AND { + tkn := self.token + self.next() + left = &ast.BinaryExpression{ + Operator: tkn, + Left: left, + Right: next(), + } + } + + return left +} + +func (self *_parser) parseBitwiseExclusiveOrExpression() ast.Expression { + next := self.parseBitwiseAndExpression + left := next() + + for self.token == token.EXCLUSIVE_OR { + tkn := self.token + self.next() + left = &ast.BinaryExpression{ + Operator: tkn, + Left: left, + Right: next(), + } + } + + return left +} + +func (self *_parser) parseBitwiseOrExpression() ast.Expression { + next := self.parseBitwiseExclusiveOrExpression + left := next() + + for self.token == token.OR { + tkn := self.token + self.next() + left = &ast.BinaryExpression{ + Operator: tkn, + Left: left, + Right: next(), + } + } + + return left +} + +func (self *_parser) parseLogicalAndExpression() ast.Expression { + next := self.parseBitwiseOrExpression + left := next() + + for self.token == token.LOGICAL_AND { + tkn := self.token + self.next() + left = &ast.BinaryExpression{ + Operator: tkn, + Left: left, + Right: next(), + } + } + + return left +} + +func (self *_parser) parseLogicalOrExpression() ast.Expression { + next := self.parseLogicalAndExpression + left := next() + + for self.token == token.LOGICAL_OR { + tkn := self.token + self.next() + left = &ast.BinaryExpression{ + Operator: tkn, + Left: left, + Right: next(), + } + } + + return left +} + +func (self *_parser) parseConditionlExpression() ast.Expression { + left := self.parseLogicalOrExpression() + + if self.token == token.QUESTION_MARK { + self.next() + consequent := self.parseAssignmentExpression() + self.expect(token.COLON) + return &ast.ConditionalExpression{ + Test: left, + Consequent: consequent, + Alternate: self.parseAssignmentExpression(), + } + } + + return left +} + +func (self *_parser) parseAssignmentExpression() ast.Expression { + left := self.parseConditionlExpression() + var operator token.Token + switch self.token { + case token.ASSIGN: + operator = self.token + case token.ADD_ASSIGN: + operator = token.PLUS + case token.SUBTRACT_ASSIGN: + operator = token.MINUS + case token.MULTIPLY_ASSIGN: + operator = token.MULTIPLY + case token.QUOTIENT_ASSIGN: + operator = token.SLASH + case token.REMAINDER_ASSIGN: + operator = token.REMAINDER + case token.AND_ASSIGN: + operator = token.AND + case token.AND_NOT_ASSIGN: + operator = token.AND_NOT + case token.OR_ASSIGN: + operator = token.OR + case token.EXCLUSIVE_OR_ASSIGN: + operator = token.EXCLUSIVE_OR + case token.SHIFT_LEFT_ASSIGN: + operator = token.SHIFT_LEFT + case token.SHIFT_RIGHT_ASSIGN: + operator = token.SHIFT_RIGHT + case token.UNSIGNED_SHIFT_RIGHT_ASSIGN: + operator = token.UNSIGNED_SHIFT_RIGHT + } + + if operator != 0 { + idx := self.idx + self.next() + switch left.(type) { + case *ast.Identifier, *ast.DotExpression, *ast.BracketExpression: + default: + self.error(left.Idx0(), "Invalid left-hand side in assignment") + self.nextStatement() + return &ast.BadExpression{From: idx, To: self.idx} + } + return &ast.AssignExpression{ + Left: left, + Operator: operator, + Right: self.parseAssignmentExpression(), + } + } + + return left +} + +func (self *_parser) parseExpression() ast.Expression { + next := self.parseAssignmentExpression + left := next() + + if self.token == token.COMMA { + sequence := []ast.Expression{left} + for { + if self.token != token.COMMA { + break + } + self.next() + sequence = append(sequence, next()) + } + return &ast.SequenceExpression{ + Sequence: sequence, + } + } + + return left +} diff --git a/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/lexer.go b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/lexer.go new file mode 100644 index 000000000..bc3e74f77 --- /dev/null +++ b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/lexer.go @@ -0,0 +1,819 @@ +package parser + +import ( + "bytes" + "errors" + "fmt" + "regexp" + "strconv" + "strings" + "unicode" + "unicode/utf8" + + "github.com/robertkrimen/otto/file" + "github.com/robertkrimen/otto/token" +) + +type _chr struct { + value rune + width int +} + +var matchIdentifier = regexp.MustCompile(`^[$_\p{L}][$_\p{L}\d}]*$`) + +func isDecimalDigit(chr rune) bool { + return '0' <= chr && chr <= '9' +} + +func digitValue(chr rune) int { + switch { + case '0' <= chr && chr <= '9': + return int(chr - '0') + case 'a' <= chr && chr <= 'f': + return int(chr - 'a' + 10) + case 'A' <= chr && chr <= 'F': + return int(chr - 'A' + 10) + } + return 16 // Larger than any legal digit value +} + +func isDigit(chr rune, base int) bool { + return digitValue(chr) < base +} + +func isIdentifierStart(chr rune) bool { + return chr == '$' || chr == '_' || chr == '\\' || + 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' || + chr >= utf8.RuneSelf && unicode.IsLetter(chr) +} + +func isIdentifierPart(chr rune) bool { + return chr == '$' || chr == '_' || chr == '\\' || + 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' || + '0' <= chr && chr <= '9' || + chr >= utf8.RuneSelf && (unicode.IsLetter(chr) || unicode.IsDigit(chr)) +} + +func (self *_parser) scanIdentifier() (string, error) { + offset := self.chrOffset + parse := false + for isIdentifierPart(self.chr) { + if self.chr == '\\' { + distance := self.chrOffset - offset + self.read() + if self.chr != 'u' { + return "", fmt.Errorf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr)) + } + parse = true + var value rune + for j := 0; j < 4; j++ { + self.read() + decimal, ok := hex2decimal(byte(self.chr)) + if !ok { + return "", fmt.Errorf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr)) + } + value = value<<4 | decimal + } + if value == '\\' { + return "", fmt.Errorf("Invalid identifier escape value: %c (%s)", value, string(value)) + } else if distance == 0 { + if !isIdentifierStart(value) { + return "", fmt.Errorf("Invalid identifier escape value: %c (%s)", value, string(value)) + } + } else if distance > 0 { + if !isIdentifierPart(value) { + return "", fmt.Errorf("Invalid identifier escape value: %c (%s)", value, string(value)) + } + } + } + self.read() + } + literal := string(self.str[offset:self.chrOffset]) + if parse { + return parseStringLiteral(literal) + } + return literal, nil +} + +// 7.2 +func isLineWhiteSpace(chr rune) bool { + switch chr { + case '\u0009', '\u000b', '\u000c', '\u0020', '\u00a0', '\ufeff': + return true + case '\u000a', '\u000d', '\u2028', '\u2029': + return false + case '\u0085': + return false + } + return unicode.IsSpace(chr) +} + +// 7.3 +func isLineTerminator(chr rune) bool { + switch chr { + case '\u000a', '\u000d', '\u2028', '\u2029': + return true + } + return false +} + +func (self *_parser) scan() (tkn token.Token, literal string, idx file.Idx) { + + self.implicitSemicolon = false + + for { + self.skipWhiteSpace() + + idx = self.idxOf(self.chrOffset) + insertSemicolon := false + + switch chr := self.chr; { + case isIdentifierStart(chr): + var err error + literal, err = self.scanIdentifier() + if err != nil { + tkn = token.ILLEGAL + break + } + if len(literal) > 1 { + // Keywords are longer than 1 character, avoid lookup otherwise + var strict bool + tkn, strict = token.IsKeyword(literal) + + switch tkn { + + case 0: // Not a keyword + if literal == "true" || literal == "false" { + self.insertSemicolon = true + tkn = token.BOOLEAN + return + } else if literal == "null" { + self.insertSemicolon = true + tkn = token.NULL + return + } + + case token.KEYWORD: + tkn = token.KEYWORD + if strict { + // TODO If strict and in strict mode, then this is not a break + break + } + return + + case + token.THIS, + token.BREAK, + token.THROW, // A newline after a throw is not allowed, but we need to detect it + token.RETURN, + token.CONTINUE, + token.DEBUGGER: + self.insertSemicolon = true + return + + default: + return + + } + } + self.insertSemicolon = true + tkn = token.IDENTIFIER + return + case '0' <= chr && chr <= '9': + self.insertSemicolon = true + tkn, literal = self.scanNumericLiteral(false) + return + default: + self.read() + switch chr { + case -1: + if self.insertSemicolon { + self.insertSemicolon = false + self.implicitSemicolon = true + } + tkn = token.EOF + case '\r', '\n', '\u2028', '\u2029': + self.insertSemicolon = false + self.implicitSemicolon = true + continue + case ':': + tkn = token.COLON + case '.': + if digitValue(self.chr) < 10 { + insertSemicolon = true + tkn, literal = self.scanNumericLiteral(true) + } else { + tkn = token.PERIOD + } + case ',': + tkn = token.COMMA + case ';': + tkn = token.SEMICOLON + case '(': + tkn = token.LEFT_PARENTHESIS + case ')': + tkn = token.RIGHT_PARENTHESIS + insertSemicolon = true + case '[': + tkn = token.LEFT_BRACKET + case ']': + tkn = token.RIGHT_BRACKET + insertSemicolon = true + case '{': + tkn = token.LEFT_BRACE + case '}': + tkn = token.RIGHT_BRACE + insertSemicolon = true + case '+': + tkn = self.switch3(token.PLUS, token.ADD_ASSIGN, '+', token.INCREMENT) + if tkn == token.INCREMENT { + insertSemicolon = true + } + case '-': + tkn = self.switch3(token.MINUS, token.SUBTRACT_ASSIGN, '-', token.DECREMENT) + if tkn == token.DECREMENT { + insertSemicolon = true + } + case '*': + tkn = self.switch2(token.MULTIPLY, token.MULTIPLY_ASSIGN) + case '/': + if self.chr == '/' { + self.skipSingleLineComment() + continue + } else if self.chr == '*' { + self.skipMultiLineComment() + continue + } else { + // Could be division, could be RegExp literal + tkn = self.switch2(token.SLASH, token.QUOTIENT_ASSIGN) + insertSemicolon = true + } + case '%': + tkn = self.switch2(token.REMAINDER, token.REMAINDER_ASSIGN) + case '^': + tkn = self.switch2(token.EXCLUSIVE_OR, token.EXCLUSIVE_OR_ASSIGN) + case '<': + tkn = self.switch4(token.LESS, token.LESS_OR_EQUAL, '<', token.SHIFT_LEFT, token.SHIFT_LEFT_ASSIGN) + case '>': + tkn = self.switch6(token.GREATER, token.GREATER_OR_EQUAL, '>', token.SHIFT_RIGHT, token.SHIFT_RIGHT_ASSIGN, '>', token.UNSIGNED_SHIFT_RIGHT, token.UNSIGNED_SHIFT_RIGHT_ASSIGN) + case '=': + tkn = self.switch2(token.ASSIGN, token.EQUAL) + if tkn == token.EQUAL && self.chr == '=' { + self.read() + tkn = token.STRICT_EQUAL + } + case '!': + tkn = self.switch2(token.NOT, token.NOT_EQUAL) + if tkn == token.NOT_EQUAL && self.chr == '=' { + self.read() + tkn = token.STRICT_NOT_EQUAL + } + case '&': + if self.chr == '^' { + self.read() + tkn = self.switch2(token.AND_NOT, token.AND_NOT_ASSIGN) + } else { + tkn = self.switch3(token.AND, token.AND_ASSIGN, '&', token.LOGICAL_AND) + } + case '|': + tkn = self.switch3(token.OR, token.OR_ASSIGN, '|', token.LOGICAL_OR) + case '~': + tkn = token.BITWISE_NOT + case '?': + tkn = token.QUESTION_MARK + case '"', '\'': + insertSemicolon = true + tkn = token.STRING + var err error + literal, err = self.scanString(self.chrOffset - 1) + if err != nil { + tkn = token.ILLEGAL + } + default: + self.errorUnexpected(idx, chr) + tkn = token.ILLEGAL + } + } + self.insertSemicolon = insertSemicolon + return + } +} + +func (self *_parser) switch2(tkn0, tkn1 token.Token) token.Token { + if self.chr == '=' { + self.read() + return tkn1 + } + return tkn0 +} + +func (self *_parser) switch3(tkn0, tkn1 token.Token, chr2 rune, tkn2 token.Token) token.Token { + if self.chr == '=' { + self.read() + return tkn1 + } + if self.chr == chr2 { + self.read() + return tkn2 + } + return tkn0 +} + +func (self *_parser) switch4(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token) token.Token { + if self.chr == '=' { + self.read() + return tkn1 + } + if self.chr == chr2 { + self.read() + if self.chr == '=' { + self.read() + return tkn3 + } + return tkn2 + } + return tkn0 +} + +func (self *_parser) switch6(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token, chr3 rune, tkn4, tkn5 token.Token) token.Token { + if self.chr == '=' { + self.read() + return tkn1 + } + if self.chr == chr2 { + self.read() + if self.chr == '=' { + self.read() + return tkn3 + } + if self.chr == chr3 { + self.read() + if self.chr == '=' { + self.read() + return tkn5 + } + return tkn4 + } + return tkn2 + } + return tkn0 +} + +func (self *_parser) chrAt(index int) _chr { + value, width := utf8.DecodeRuneInString(self.str[index:]) + return _chr{ + value: value, + width: width, + } +} + +func (self *_parser) _peek() rune { + if self.offset+1 < self.length { + return rune(self.str[self.offset+1]) + } + return -1 +} + +func (self *_parser) read() { + if self.offset < self.length { + self.chrOffset = self.offset + chr, width := rune(self.str[self.offset]), 1 + if chr >= utf8.RuneSelf { // !ASCII + chr, width = utf8.DecodeRuneInString(self.str[self.offset:]) + if chr == utf8.RuneError && width == 1 { + self.error(self.chrOffset, "Invalid UTF-8 character") + } + } + self.offset += width + self.chr = chr + } else { + self.chrOffset = self.length + self.chr = -1 // EOF + } +} + +// This is here since the functions are so similar +func (self *_RegExp_parser) read() { + if self.offset < self.length { + self.chrOffset = self.offset + chr, width := rune(self.str[self.offset]), 1 + if chr >= utf8.RuneSelf { // !ASCII + chr, width = utf8.DecodeRuneInString(self.str[self.offset:]) + if chr == utf8.RuneError && width == 1 { + self.error(self.chrOffset, "Invalid UTF-8 character") + } + } + self.offset += width + self.chr = chr + } else { + self.chrOffset = self.length + self.chr = -1 // EOF + } +} + +func (self *_parser) skipSingleLineComment() { + for self.chr != -1 { + self.read() + if isLineTerminator(self.chr) { + return + } + } +} + +func (self *_parser) skipMultiLineComment() { + self.read() + for self.chr >= 0 { + chr := self.chr + self.read() + if chr == '*' && self.chr == '/' { + self.read() + return + } + } + + self.errorUnexpected(0, self.chr) +} + +func (self *_parser) skipWhiteSpace() { + for { + switch self.chr { + case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff': + self.read() + continue + case '\r': + if self._peek() == '\n' { + self.read() + } + fallthrough + case '\u2028', '\u2029', '\n': + if self.insertSemicolon { + return + } + self.read() + continue + } + if self.chr >= utf8.RuneSelf { + if unicode.IsSpace(self.chr) { + self.read() + continue + } + } + break + } +} + +func (self *_parser) skipLineWhiteSpace() { + for isLineWhiteSpace(self.chr) { + self.read() + } +} + +func (self *_parser) scanMantissa(base int) { + for digitValue(self.chr) < base { + self.read() + } +} + +func (self *_parser) scanEscape(quote rune) { + + var length, base uint32 + switch self.chr { + //case '0', '1', '2', '3', '4', '5', '6', '7': + // Octal: + // length, base, limit = 3, 8, 255 + case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"', '\'', '0': + self.read() + return + case '\r', '\n', '\u2028', '\u2029': + self.scanNewline() + return + case 'x': + self.read() + length, base = 2, 16 + case 'u': + self.read() + length, base = 4, 16 + default: + self.read() // Always make progress + return + } + + var value uint32 + for ; length > 0 && self.chr != quote && self.chr >= 0; length-- { + digit := uint32(digitValue(self.chr)) + if digit >= base { + break + } + value = value*base + digit + self.read() + } +} + +func (self *_parser) scanString(offset int) (string, error) { + // " ' / + quote := rune(self.str[offset]) + + for self.chr != quote { + chr := self.chr + if chr == '\n' || chr == '\r' || chr == '\u2028' || chr == '\u2029' || chr < 0 { + goto newline + } + self.read() + if chr == '\\' { + if quote == '/' { + if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 { + goto newline + } + self.read() + } else { + self.scanEscape(quote) + } + } else if chr == '[' && quote == '/' { + // Allow a slash (/) in a bracket character class ([...]) + // TODO Fix this, this is hacky... + quote = -1 + } else if chr == ']' && quote == -1 { + quote = '/' + } + } + + // " ' / + self.read() + + return string(self.str[offset:self.chrOffset]), nil + +newline: + self.scanNewline() + err := "String not terminated" + if quote == '/' { + err = "Invalid regular expression: missing /" + self.error(self.idxOf(offset), err) + } + return "", errors.New(err) +} + +func (self *_parser) scanNewline() { + if self.chr == '\r' { + self.read() + if self.chr != '\n' { + return + } + } + self.read() +} + +func hex2decimal(chr byte) (value rune, ok bool) { + { + chr := rune(chr) + switch { + case '0' <= chr && chr <= '9': + return chr - '0', true + case 'a' <= chr && chr <= 'f': + return chr - 'a' + 10, true + case 'A' <= chr && chr <= 'F': + return chr - 'A' + 10, true + } + return + } +} + +func parseNumberLiteral(literal string) (value interface{}, err error) { + // TODO Is Uint okay? What about -MAX_UINT + value, err = strconv.ParseInt(literal, 0, 64) + if err == nil { + return + } + + parseIntErr := err // Save this first error, just in case + + value, err = strconv.ParseFloat(literal, 64) + if err == nil { + return + } else if err.(*strconv.NumError).Err == strconv.ErrRange { + // Infinity, etc. + return value, nil + } + + err = parseIntErr + + if err.(*strconv.NumError).Err == strconv.ErrRange { + if len(literal) > 2 && literal[0] == '0' && (literal[1] == 'X' || literal[1] == 'x') { + // Could just be a very large number (e.g. 0x8000000000000000) + var value float64 + literal = literal[2:] + for _, chr := range literal { + digit := digitValue(chr) + if digit >= 16 { + goto error + } + value = value*16 + float64(digit) + } + return value, nil + } + } + +error: + return nil, errors.New("Illegal numeric literal") +} + +func parseStringLiteral(literal string) (string, error) { + // Best case scenario... + if literal == "" { + return "", nil + } + + // Slightly less-best case scenario... + if !strings.ContainsRune(literal, '\\') { + return literal, nil + } + + str := literal + buffer := bytes.NewBuffer(make([]byte, 0, 3*len(literal)/2)) + + for len(str) > 0 { + switch chr := str[0]; { + // We do not explicitly handle the case of the quote + // value, which can be: " ' / + // This assumes we're already passed a partially well-formed literal + case chr >= utf8.RuneSelf: + chr, size := utf8.DecodeRuneInString(str) + buffer.WriteRune(chr) + str = str[size:] + continue + case chr != '\\': + buffer.WriteByte(chr) + str = str[1:] + continue + } + + if len(str) <= 1 { + panic("len(str) <= 1") + } + chr := str[1] + var value rune + if chr >= utf8.RuneSelf { + str = str[1:] + var size int + value, size = utf8.DecodeRuneInString(str) + str = str[size:] // \ + <character> + } else { + str = str[2:] // \<character> + switch chr { + case 'b': + value = '\b' + case 'f': + value = '\f' + case 'n': + value = '\n' + case 'r': + value = '\r' + case 't': + value = '\t' + case 'v': + value = '\v' + case 'x', 'u': + size := 0 + switch chr { + case 'x': + size = 2 + case 'u': + size = 4 + } + if len(str) < size { + return "", fmt.Errorf("invalid escape: \\%s: len(%q) != %d", string(chr), str, size) + } + for j := 0; j < size; j++ { + decimal, ok := hex2decimal(str[j]) + if !ok { + return "", fmt.Errorf("invalid escape: \\%s: %q", string(chr), str[:size]) + } + value = value<<4 | decimal + } + str = str[size:] + if chr == 'x' { + break + } + if value > utf8.MaxRune { + panic("value > utf8.MaxRune") + } + case '0': + if len(str) == 0 || '0' > str[0] || str[0] > '7' { + value = 0 + break + } + fallthrough + case '1', '2', '3', '4', '5', '6', '7': + // TODO strict + value = rune(chr) - '0' + j := 0 + for ; j < 2; j++ { + if len(str) < j+1 { + break + } + chr := str[j] + if '0' > chr || chr > '7' { + break + } + decimal := rune(str[j]) - '0' + value = (value << 3) | decimal + } + str = str[j:] + case '\\': + value = '\\' + case '\'', '"': + value = rune(chr) + case '\r': + if len(str) > 0 { + if str[0] == '\n' { + str = str[1:] + } + } + fallthrough + case '\n': + continue + default: + value = rune(chr) + } + } + buffer.WriteRune(value) + } + + return buffer.String(), nil +} + +func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) { + + offset := self.chrOffset + tkn := token.NUMBER + + if decimalPoint { + offset-- + self.scanMantissa(10) + goto exponent + } + + if self.chr == '0' { + offset := self.chrOffset + self.read() + if self.chr == 'x' || self.chr == 'X' { + // Hexadecimal + self.read() + if isDigit(self.chr, 16) { + self.read() + } else { + return token.ILLEGAL, self.str[offset:self.chrOffset] + } + self.scanMantissa(16) + + if self.chrOffset-offset <= 2 { + // Only "0x" or "0X" + self.error(0, "Illegal hexadecimal number") + } + + goto hexadecimal + } else if self.chr == '.' { + // Float + goto float + } else { + // Octal, Float + if self.chr == 'e' || self.chr == 'E' { + goto exponent + } + self.scanMantissa(8) + if self.chr == '8' || self.chr == '9' { + return token.ILLEGAL, self.str[offset:self.chrOffset] + } + goto octal + } + } + + self.scanMantissa(10) + +float: + if self.chr == '.' { + self.read() + self.scanMantissa(10) + } + +exponent: + if self.chr == 'e' || self.chr == 'E' { + self.read() + if self.chr == '-' || self.chr == '+' { + self.read() + } + if isDecimalDigit(self.chr) { + self.read() + self.scanMantissa(10) + } else { + return token.ILLEGAL, self.str[offset:self.chrOffset] + } + } + +hexadecimal: +octal: + if isIdentifierStart(self.chr) || isDecimalDigit(self.chr) { + return token.ILLEGAL, self.str[offset:self.chrOffset] + } + + return tkn, self.str[offset:self.chrOffset] +} diff --git a/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/lexer_test.go b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/lexer_test.go new file mode 100644 index 000000000..37eb7a464 --- /dev/null +++ b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/lexer_test.go @@ -0,0 +1,380 @@ +package parser + +import ( + "../terst" + "testing" + + "github.com/robertkrimen/otto/file" + "github.com/robertkrimen/otto/token" +) + +var tt = terst.Terst +var is = terst.Is + +func TestLexer(t *testing.T) { + tt(t, func() { + setup := func(src string) *_parser { + parser := newParser("", src) + return parser + } + + test := func(src string, test ...interface{}) { + parser := setup(src) + for len(test) > 0 { + tkn, literal, idx := parser.scan() + if len(test) > 0 { + is(tkn, test[0].(token.Token)) + test = test[1:] + } + if len(test) > 0 { + is(literal, test[0].(string)) + test = test[1:] + } + if len(test) > 0 { + // FIXME terst, Fix this so that cast to file.Idx is not necessary? + is(idx, file.Idx(test[0].(int))) + test = test[1:] + } + } + } + + test("", + token.EOF, "", 1, + ) + + test("1", + token.NUMBER, "1", 1, + token.EOF, "", 2, + ) + + test(".0", + token.NUMBER, ".0", 1, + token.EOF, "", 3, + ) + + test("abc", + token.IDENTIFIER, "abc", 1, + token.EOF, "", 4, + ) + + test("abc(1)", + token.IDENTIFIER, "abc", 1, + token.LEFT_PARENTHESIS, "", 4, + token.NUMBER, "1", 5, + token.RIGHT_PARENTHESIS, "", 6, + token.EOF, "", 7, + ) + + test(".", + token.PERIOD, "", 1, + token.EOF, "", 2, + ) + + test("===.", + token.STRICT_EQUAL, "", 1, + token.PERIOD, "", 4, + token.EOF, "", 5, + ) + + test(">>>=.0", + token.UNSIGNED_SHIFT_RIGHT_ASSIGN, "", 1, + token.NUMBER, ".0", 5, + token.EOF, "", 7, + ) + + test(">>>=0.0.", + token.UNSIGNED_SHIFT_RIGHT_ASSIGN, "", 1, + token.NUMBER, "0.0", 5, + token.PERIOD, "", 8, + token.EOF, "", 9, + ) + + test("\"abc\"", + token.STRING, "\"abc\"", 1, + token.EOF, "", 6, + ) + + test("abc = //", + token.IDENTIFIER, "abc", 1, + token.ASSIGN, "", 5, + token.EOF, "", 9, + ) + + test("abc = 1 / 2", + token.IDENTIFIER, "abc", 1, + token.ASSIGN, "", 5, + token.NUMBER, "1", 7, + token.SLASH, "", 9, + token.NUMBER, "2", 11, + token.EOF, "", 12, + ) + + test("xyzzy = 'Nothing happens.'", + token.IDENTIFIER, "xyzzy", 1, + token.ASSIGN, "", 7, + token.STRING, "'Nothing happens.'", 9, + token.EOF, "", 27, + ) + + test("abc = !false", + token.IDENTIFIER, "abc", 1, + token.ASSIGN, "", 5, + token.NOT, "", 7, + token.BOOLEAN, "false", 8, + token.EOF, "", 13, + ) + + test("abc = !!true", + token.IDENTIFIER, "abc", 1, + token.ASSIGN, "", 5, + token.NOT, "", 7, + token.NOT, "", 8, + token.BOOLEAN, "true", 9, + token.EOF, "", 13, + ) + + test("abc *= 1", + token.IDENTIFIER, "abc", 1, + token.MULTIPLY_ASSIGN, "", 5, + token.NUMBER, "1", 8, + token.EOF, "", 9, + ) + + test("if 1 else", + token.IF, "if", 1, + token.NUMBER, "1", 4, + token.ELSE, "else", 6, + token.EOF, "", 10, + ) + + test("null", + token.NULL, "null", 1, + token.EOF, "", 5, + ) + + test(`"\u007a\x79\u000a\x78"`, + token.STRING, "\"\\u007a\\x79\\u000a\\x78\"", 1, + token.EOF, "", 23, + ) + + test(`"[First line \ +Second line \ + Third line\ +. ]" + `, + token.STRING, "\"[First line \\\nSecond line \\\n Third line\\\n. ]\"", 1, + token.EOF, "", 53, + ) + + test("/", + token.SLASH, "", 1, + token.EOF, "", 2, + ) + + test("var abc = \"abc\uFFFFabc\"", + token.VAR, "var", 1, + token.IDENTIFIER, "abc", 5, + token.ASSIGN, "", 9, + token.STRING, "\"abc\uFFFFabc\"", 11, + token.EOF, "", 22, + ) + + test(`'\t' === '\r'`, + token.STRING, "'\\t'", 1, + token.STRICT_EQUAL, "", 6, + token.STRING, "'\\r'", 10, + token.EOF, "", 14, + ) + + test(`var \u0024 = 1`, + token.VAR, "var", 1, + token.IDENTIFIER, "$", 5, + token.ASSIGN, "", 12, + token.NUMBER, "1", 14, + token.EOF, "", 15, + ) + + test("10e10000", + token.NUMBER, "10e10000", 1, + token.EOF, "", 9, + ) + + test(`var if var class`, + token.VAR, "var", 1, + token.IF, "if", 5, + token.VAR, "var", 8, + token.KEYWORD, "class", 12, + token.EOF, "", 17, + ) + + test(`-0`, + token.MINUS, "", 1, + token.NUMBER, "0", 2, + token.EOF, "", 3, + ) + + test(`.01`, + token.NUMBER, ".01", 1, + token.EOF, "", 4, + ) + + test(`.01e+2`, + token.NUMBER, ".01e+2", 1, + token.EOF, "", 7, + ) + + test(";", + token.SEMICOLON, "", 1, + token.EOF, "", 2, + ) + + test(";;", + token.SEMICOLON, "", 1, + token.SEMICOLON, "", 2, + token.EOF, "", 3, + ) + + test("//", + token.EOF, "", 3, + ) + + test(";;//", + token.SEMICOLON, "", 1, + token.SEMICOLON, "", 2, + token.EOF, "", 5, + ) + + test("1", + token.NUMBER, "1", 1, + ) + + test("12 123", + token.NUMBER, "12", 1, + token.NUMBER, "123", 4, + ) + + test("1.2 12.3", + token.NUMBER, "1.2", 1, + token.NUMBER, "12.3", 5, + ) + + test("/ /=", + token.SLASH, "", 1, + token.QUOTIENT_ASSIGN, "", 3, + ) + + test(`"abc"`, + token.STRING, `"abc"`, 1, + ) + + test(`'abc'`, + token.STRING, `'abc'`, 1, + ) + + test("++", + token.INCREMENT, "", 1, + ) + + test(">", + token.GREATER, "", 1, + ) + + test(">=", + token.GREATER_OR_EQUAL, "", 1, + ) + + test(">>", + token.SHIFT_RIGHT, "", 1, + ) + + test(">>=", + token.SHIFT_RIGHT_ASSIGN, "", 1, + ) + + test(">>>", + token.UNSIGNED_SHIFT_RIGHT, "", 1, + ) + + test(">>>=", + token.UNSIGNED_SHIFT_RIGHT_ASSIGN, "", 1, + ) + + test("1 \"abc\"", + token.NUMBER, "1", 1, + token.STRING, "\"abc\"", 3, + ) + + test(",", + token.COMMA, "", 1, + ) + + test("1, \"abc\"", + token.NUMBER, "1", 1, + token.COMMA, "", 2, + token.STRING, "\"abc\"", 4, + ) + + test("new abc(1, 3.14159);", + token.NEW, "new", 1, + token.IDENTIFIER, "abc", 5, + token.LEFT_PARENTHESIS, "", 8, + token.NUMBER, "1", 9, + token.COMMA, "", 10, + token.NUMBER, "3.14159", 12, + token.RIGHT_PARENTHESIS, "", 19, + token.SEMICOLON, "", 20, + ) + + test("1 == \"1\"", + token.NUMBER, "1", 1, + token.EQUAL, "", 3, + token.STRING, "\"1\"", 6, + ) + + test("1\n[]\n", + token.NUMBER, "1", 1, + token.LEFT_BRACKET, "", 3, + token.RIGHT_BRACKET, "", 4, + ) + + test("1\ufeff[]\ufeff", + token.NUMBER, "1", 1, + token.LEFT_BRACKET, "", 5, + token.RIGHT_BRACKET, "", 6, + ) + + // ILLEGAL + + test(`3ea`, + token.ILLEGAL, "3e", 1, + token.IDENTIFIER, "a", 3, + token.EOF, "", 4, + ) + + test(`3in`, + token.ILLEGAL, "3", 1, + token.IN, "in", 2, + token.EOF, "", 4, + ) + + test("\"Hello\nWorld\"", + token.ILLEGAL, "", 1, + token.IDENTIFIER, "World", 8, + token.ILLEGAL, "", 13, + token.EOF, "", 14, + ) + + test("\u203f = 10", + token.ILLEGAL, "", 1, + token.ASSIGN, "", 5, + token.NUMBER, "10", 7, + token.EOF, "", 9, + ) + + test(`"\x0G"`, + token.STRING, "\"\\x0G\"", 1, + token.EOF, "", 7, + ) + + }) +} diff --git a/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/marshal_test.go b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/marshal_test.go new file mode 100644 index 000000000..f54cd2d4f --- /dev/null +++ b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/marshal_test.go @@ -0,0 +1,930 @@ +package parser + +import ( + "bytes" + "encoding/json" + "fmt" + "os" + "reflect" + "strings" + "testing" + + "github.com/robertkrimen/otto/ast" +) + +func marshal(name string, children ...interface{}) interface{} { + if len(children) == 1 { + if name == "" { + return testMarshalNode(children[0]) + } + return map[string]interface{}{ + name: children[0], + } + } + map_ := map[string]interface{}{} + length := len(children) / 2 + for i := 0; i < length; i++ { + name := children[i*2].(string) + value := children[i*2+1] + map_[name] = value + } + if name == "" { + return map_ + } + return map[string]interface{}{ + name: map_, + } +} + +func testMarshalNode(node interface{}) interface{} { + switch node := node.(type) { + + // Expression + + case *ast.ArrayLiteral: + return marshal("Array", testMarshalNode(node.Value)) + + case *ast.AssignExpression: + return marshal("Assign", + "Left", testMarshalNode(node.Left), + "Right", testMarshalNode(node.Right), + ) + + case *ast.BinaryExpression: + return marshal("BinaryExpression", + "Operator", node.Operator.String(), + "Left", testMarshalNode(node.Left), + "Right", testMarshalNode(node.Right), + ) + + case *ast.BooleanLiteral: + return marshal("Literal", node.Value) + + case *ast.CallExpression: + return marshal("Call", + "Callee", testMarshalNode(node.Callee), + "ArgumentList", testMarshalNode(node.ArgumentList), + ) + + case *ast.ConditionalExpression: + return marshal("Conditional", + "Test", testMarshalNode(node.Test), + "Consequent", testMarshalNode(node.Consequent), + "Alternate", testMarshalNode(node.Alternate), + ) + + case *ast.DotExpression: + return marshal("Dot", + "Left", testMarshalNode(node.Left), + "Member", node.Identifier.Name, + ) + + case *ast.NewExpression: + return marshal("New", + "Callee", testMarshalNode(node.Callee), + "ArgumentList", testMarshalNode(node.ArgumentList), + ) + + case *ast.NullLiteral: + return marshal("Literal", nil) + + case *ast.NumberLiteral: + return marshal("Literal", node.Value) + + case *ast.ObjectLiteral: + return marshal("Object", testMarshalNode(node.Value)) + + case *ast.RegExpLiteral: + return marshal("Literal", node.Literal) + + case *ast.StringLiteral: + return marshal("Literal", node.Literal) + + case *ast.VariableExpression: + return []interface{}{node.Name, testMarshalNode(node.Initializer)} + + // Statement + + case *ast.Program: + return testMarshalNode(node.Body) + + case *ast.BlockStatement: + return marshal("BlockStatement", testMarshalNode(node.List)) + + case *ast.EmptyStatement: + return "EmptyStatement" + + case *ast.ExpressionStatement: + return testMarshalNode(node.Expression) + + case *ast.ForInStatement: + return marshal("ForIn", + "Into", marshal("", node.Into), + "Source", marshal("", node.Source), + "Body", marshal("", node.Body), + ) + + case *ast.FunctionLiteral: + return marshal("Function", testMarshalNode(node.Body)) + + case *ast.Identifier: + return marshal("Identifier", node.Name) + + case *ast.IfStatement: + if_ := marshal("", + "Test", testMarshalNode(node.Test), + "Consequent", testMarshalNode(node.Consequent), + ).(map[string]interface{}) + if node.Alternate != nil { + if_["Alternate"] = testMarshalNode(node.Alternate) + } + return marshal("If", if_) + + case *ast.LabelledStatement: + return marshal("Label", + "Name", node.Label.Name, + "Statement", testMarshalNode(node.Statement), + ) + case ast.Property: + return marshal("", + "Key", node.Key, + "Value", testMarshalNode(node.Value), + ) + + case *ast.ReturnStatement: + return marshal("Return", testMarshalNode(node.Argument)) + + case *ast.SequenceExpression: + return marshal("Sequence", testMarshalNode(node.Sequence)) + + case *ast.ThrowStatement: + return marshal("Throw", testMarshalNode(node.Argument)) + + case *ast.VariableStatement: + return marshal("Var", testMarshalNode(node.List)) + + } + + { + value := reflect.ValueOf(node) + if value.Kind() == reflect.Slice { + tmp0 := []interface{}{} + for index := 0; index < value.Len(); index++ { + tmp0 = append(tmp0, testMarshalNode(value.Index(index).Interface())) + } + return tmp0 + } + } + + if node != nil { + fmt.Fprintf(os.Stderr, "testMarshalNode(%T)\n", node) + } + + return nil +} + +func testMarshal(node interface{}) string { + value, err := json.Marshal(testMarshalNode(node)) + if err != nil { + panic(err) + } + return string(value) +} + +func TestParserAST(t *testing.T) { + tt(t, func() { + + test := func(inputOutput string) { + match := matchBeforeAfterSeparator.FindStringIndex(inputOutput) + input := strings.TrimSpace(inputOutput[0:match[0]]) + wantOutput := strings.TrimSpace(inputOutput[match[1]:]) + _, program, err := testParse(input) + is(err, nil) + haveOutput := testMarshal(program) + tmp0, tmp1 := bytes.Buffer{}, bytes.Buffer{} + json.Indent(&tmp0, []byte(haveOutput), "\t\t", " ") + json.Indent(&tmp1, []byte(wantOutput), "\t\t", " ") + is("\n\t\t"+tmp0.String(), "\n\t\t"+tmp1.String()) + } + + test(` + --- +[] + `) + + test(` + ; + --- +[ + "EmptyStatement" +] + `) + + test(` + ;;; + --- +[ + "EmptyStatement", + "EmptyStatement", + "EmptyStatement" +] + `) + + test(` + 1; true; abc; "abc"; null; + --- +[ + { + "Literal": 1 + }, + { + "Literal": true + }, + { + "Identifier": "abc" + }, + { + "Literal": "\"abc\"" + }, + { + "Literal": null + } +] + `) + + test(` + { 1; null; 3.14159; ; } + --- +[ + { + "BlockStatement": [ + { + "Literal": 1 + }, + { + "Literal": null + }, + { + "Literal": 3.14159 + }, + "EmptyStatement" + ] + } +] + `) + + test(` + new abc(); + --- +[ + { + "New": { + "ArgumentList": [], + "Callee": { + "Identifier": "abc" + } + } + } +] + `) + + test(` + new abc(1, 3.14159) + --- +[ + { + "New": { + "ArgumentList": [ + { + "Literal": 1 + }, + { + "Literal": 3.14159 + } + ], + "Callee": { + "Identifier": "abc" + } + } + } +] + `) + + test(` + true ? false : true + --- +[ + { + "Conditional": { + "Alternate": { + "Literal": true + }, + "Consequent": { + "Literal": false + }, + "Test": { + "Literal": true + } + } + } +] + `) + + test(` + true || false + --- +[ + { + "BinaryExpression": { + "Left": { + "Literal": true + }, + "Operator": "||", + "Right": { + "Literal": false + } + } + } +] + `) + + test(` + 0 + { abc: true } + --- +[ + { + "BinaryExpression": { + "Left": { + "Literal": 0 + }, + "Operator": "+", + "Right": { + "Object": [ + { + "Key": "abc", + "Value": { + "Literal": true + } + } + ] + } + } + } +] + `) + + test(` + 1 == "1" + --- +[ + { + "BinaryExpression": { + "Left": { + "Literal": 1 + }, + "Operator": "==", + "Right": { + "Literal": "\"1\"" + } + } + } +] + `) + + test(` + abc(1) + --- +[ + { + "Call": { + "ArgumentList": [ + { + "Literal": 1 + } + ], + "Callee": { + "Identifier": "abc" + } + } + } +] + `) + + test(` + Math.pow(3, 2) + --- +[ + { + "Call": { + "ArgumentList": [ + { + "Literal": 3 + }, + { + "Literal": 2 + } + ], + "Callee": { + "Dot": { + "Left": { + "Identifier": "Math" + }, + "Member": "pow" + } + } + } + } +] + `) + + test(` + 1, 2, 3 + --- +[ + { + "Sequence": [ + { + "Literal": 1 + }, + { + "Literal": 2 + }, + { + "Literal": 3 + } + ] + } +] + `) + + test(` + / abc / gim; + --- +[ + { + "Literal": "/ abc / gim" + } +] + `) + + test(` + if (0) + 1; + --- +[ + { + "If": { + "Consequent": { + "Literal": 1 + }, + "Test": { + "Literal": 0 + } + } + } +] + `) + + test(` + 0+function(){ + return; + } + --- +[ + { + "BinaryExpression": { + "Left": { + "Literal": 0 + }, + "Operator": "+", + "Right": { + "Function": { + "BlockStatement": [ + { + "Return": null + } + ] + } + } + } + } +] + `) + + test(` + xyzzy // Ignore it + // Ignore this + // And this + /* And all.. + + + + ... of this! + */ + "Nothing happens." + // And finally this + --- +[ + { + "Identifier": "xyzzy" + }, + { + "Literal": "\"Nothing happens.\"" + } +] + `) + + test(` + ((x & (x = 1)) !== 0) + --- +[ + { + "BinaryExpression": { + "Left": { + "BinaryExpression": { + "Left": { + "Identifier": "x" + }, + "Operator": "\u0026", + "Right": { + "Assign": { + "Left": { + "Identifier": "x" + }, + "Right": { + "Literal": 1 + } + } + } + } + }, + "Operator": "!==", + "Right": { + "Literal": 0 + } + } + } +] + `) + + test(` + { abc: 'def' } + --- +[ + { + "BlockStatement": [ + { + "Label": { + "Name": "abc", + "Statement": { + "Literal": "'def'" + } + } + } + ] + } +] + `) + + test(` + // This is not an object, this is a string literal with a label! + ({ abc: 'def' }) + --- +[ + { + "Object": [ + { + "Key": "abc", + "Value": { + "Literal": "'def'" + } + } + ] + } +] + `) + + test(` + [,] + --- +[ + { + "Array": [ + null + ] + } +] + `) + + test(` + [,,] + --- +[ + { + "Array": [ + null, + null + ] + } +] + `) + + test(` + ({ get abc() {} }) + --- +[ + { + "Object": [ + { + "Key": "abc", + "Value": { + "Function": { + "BlockStatement": [] + } + } + } + ] + } +] + `) + + test(` + /abc/.source + --- +[ + { + "Dot": { + "Left": { + "Literal": "/abc/" + }, + "Member": "source" + } + } +] + `) + + test(` + xyzzy + + throw new TypeError("Nothing happens.") + --- +[ + { + "Identifier": "xyzzy" + }, + { + "Throw": { + "New": { + "ArgumentList": [ + { + "Literal": "\"Nothing happens.\"" + } + ], + "Callee": { + "Identifier": "TypeError" + } + } + } + } +] + `) + + // When run, this will call a type error to be thrown + // This is essentially the same as: + // + // var abc = 1(function(){})() + // + test(` + var abc = 1 + (function(){ + })() + --- +[ + { + "Var": [ + [ + "abc", + { + "Call": { + "ArgumentList": [], + "Callee": { + "Call": { + "ArgumentList": [ + { + "Function": { + "BlockStatement": [] + } + } + ], + "Callee": { + "Literal": 1 + } + } + } + } + } + ] + ] + } +] + `) + + test(` + "use strict" + --- +[ + { + "Literal": "\"use strict\"" + } +] + `) + + test(` + "use strict" + abc = 1 + 2 + 11 + --- +[ + { + "Literal": "\"use strict\"" + }, + { + "Assign": { + "Left": { + "Identifier": "abc" + }, + "Right": { + "BinaryExpression": { + "Left": { + "BinaryExpression": { + "Left": { + "Literal": 1 + }, + "Operator": "+", + "Right": { + "Literal": 2 + } + } + }, + "Operator": "+", + "Right": { + "Literal": 11 + } + } + } + } + } +] + `) + + test(` + abc = function() { 'use strict' } + --- +[ + { + "Assign": { + "Left": { + "Identifier": "abc" + }, + "Right": { + "Function": { + "BlockStatement": [ + { + "Literal": "'use strict'" + } + ] + } + } + } + } +] + `) + + test(` + for (var abc in def) { + } + --- +[ + { + "ForIn": { + "Body": { + "BlockStatement": [] + }, + "Into": [ + "abc", + null + ], + "Source": { + "Identifier": "def" + } + } + } +] + `) + + test(` + abc = { + '"': "'", + "'": '"', + } + --- +[ + { + "Assign": { + "Left": { + "Identifier": "abc" + }, + "Right": { + "Object": [ + { + "Key": "\"", + "Value": { + "Literal": "\"'\"" + } + }, + { + "Key": "'", + "Value": { + "Literal": "'\"'" + } + } + ] + } + } + } +] + `) + + return + + test(` + if (!abc && abc.jkl(def) && abc[0] === +abc[0] && abc.length < ghi) { + } + --- +[ + { + "If": { + "Consequent": { + "BlockStatement": [] + }, + "Test": { + "BinaryExpression": { + "Left": { + "BinaryExpression": { + "Left": { + "BinaryExpression": { + "Left": null, + "Operator": "\u0026\u0026", + "Right": { + "Call": { + "ArgumentList": [ + { + "Identifier": "def" + } + ], + "Callee": { + "Dot": { + "Left": { + "Identifier": "abc" + }, + "Member": "jkl" + } + } + } + } + } + }, + "Operator": "\u0026\u0026", + "Right": { + "BinaryExpression": { + "Left": null, + "Operator": "===", + "Right": null + } + } + } + }, + "Operator": "\u0026\u0026", + "Right": { + "BinaryExpression": { + "Left": { + "Dot": { + "Left": { + "Identifier": "abc" + }, + "Member": "length" + } + }, + "Operator": "\u003c", + "Right": { + "Identifier": "ghi" + } + } + } + } + } + } + } +] + `) + }) +} diff --git a/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/parser.go b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/parser.go new file mode 100644 index 000000000..1536344d7 --- /dev/null +++ b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/parser.go @@ -0,0 +1,273 @@ +/* +Package parser implements a parser for JavaScript. + + import ( + "github.com/robertkrimen/otto/parser" + ) + +Parse and return an AST + + filename := "" // A filename is optional + src := ` + // Sample xyzzy example + (function(){ + if (3.14159 > 0) { + console.log("Hello, World."); + return; + } + + var xyzzy = NaN; + console.log("Nothing happens."); + return xyzzy; + })(); + ` + + // Parse some JavaScript, yielding a *ast.Program and/or an ErrorList + program, err := parser.ParseFile(nil, filename, src, 0) + +Warning + +The parser and AST interfaces are still works-in-progress (particularly where +node types are concerned) and may change in the future. + +*/ +package parser + +import ( + "bytes" + "errors" + "io" + "io/ioutil" + + "github.com/robertkrimen/otto/ast" + "github.com/robertkrimen/otto/file" + "github.com/robertkrimen/otto/token" +) + +// A Mode value is a set of flags (or 0). They control optional parser functionality. +type Mode uint + +const ( + IgnoreRegExpErrors Mode = 1 << iota // Ignore RegExp compatibility errors (allow backtracking) +) + +type _parser struct { + filename string + str string + length int + base int + + chr rune // The current character + chrOffset int // The offset of current character + offset int // The offset after current character (may be greater than 1) + + idx file.Idx // The index of token + token token.Token // The token + literal string // The literal of the token, if any + + scope *_scope + insertSemicolon bool // If we see a newline, then insert an implicit semicolon + implicitSemicolon bool // An implicit semicolon exists + + errors ErrorList + + recover struct { + // Scratch when trying to seek to the next statement, etc. + idx file.Idx + count int + } + + mode Mode + + file *file.File +} + +func _newParser(filename, src string, base int) *_parser { + return &_parser{ + chr: ' ', // This is set so we can start scanning by skipping whitespace + str: src, + length: len(src), + base: base, + file: file.NewFile(filename, src, base), + } +} + +func newParser(filename, src string) *_parser { + return _newParser(filename, src, 1) +} + +func ReadSource(filename string, src interface{}) ([]byte, error) { + if src != nil { + switch src := src.(type) { + case string: + return []byte(src), nil + case []byte: + return src, nil + case *bytes.Buffer: + if src != nil { + return src.Bytes(), nil + } + case io.Reader: + var bfr bytes.Buffer + if _, err := io.Copy(&bfr, src); err != nil { + return nil, err + } + return bfr.Bytes(), nil + } + return nil, errors.New("invalid source") + } + return ioutil.ReadFile(filename) +} + +// ParseFile parses the source code of a single JavaScript/ECMAScript source file and returns +// the corresponding ast.Program node. +// +// If fileSet == nil, ParseFile parses source without a FileSet. +// If fileSet != nil, ParseFile first adds filename and src to fileSet. +// +// The filename argument is optional and is used for labelling errors, etc. +// +// src may be a string, a byte slice, a bytes.Buffer, or an io.Reader, but it MUST always be in UTF-8. +// +// // Parse some JavaScript, yielding a *ast.Program and/or an ErrorList +// program, err := parser.ParseFile(nil, "", `if (abc > 1) {}`, 0) +// +func ParseFile(fileSet *file.FileSet, filename string, src interface{}, mode Mode) (*ast.Program, error) { + str, err := ReadSource(filename, src) + if err != nil { + return nil, err + } + { + str := string(str) + + base := 1 + if fileSet != nil { + base = fileSet.AddFile(filename, str) + } + + parser := _newParser(filename, str, base) + parser.mode = mode + return parser.parse() + } +} + +// ParseFunction parses a given parameter list and body as a function and returns the +// corresponding ast.FunctionLiteral node. +// +// The parameter list, if any, should be a comma-separated list of identifiers. +// +func ParseFunction(parameterList, body string) (*ast.FunctionLiteral, error) { + + src := "(function(" + parameterList + ") {\n" + body + "\n})" + + parser := _newParser("", src, 1) + program, err := parser.parse() + if err != nil { + return nil, err + } + + return program.Body[0].(*ast.ExpressionStatement).Expression.(*ast.FunctionLiteral), nil +} + +func (self *_parser) slice(idx0, idx1 file.Idx) string { + from := int(idx0) - self.base + to := int(idx1) - self.base + if from >= 0 && to <= len(self.str) { + return self.str[from:to] + } + + return "" +} + +func (self *_parser) parse() (*ast.Program, error) { + self.next() + program := self.parseProgram() + if false { + self.errors.Sort() + } + return program, self.errors.Err() +} + +func (self *_parser) next() { + self.token, self.literal, self.idx = self.scan() +} + +func (self *_parser) optionalSemicolon() { + if self.token == token.SEMICOLON { + self.next() + return + } + + if self.implicitSemicolon { + self.implicitSemicolon = false + return + } + + if self.token != token.EOF && self.token != token.RIGHT_BRACE { + self.expect(token.SEMICOLON) + } +} + +func (self *_parser) semicolon() { + if self.token != token.RIGHT_PARENTHESIS && self.token != token.RIGHT_BRACE { + if self.implicitSemicolon { + self.implicitSemicolon = false + return + } + + self.expect(token.SEMICOLON) + } +} + +func (self *_parser) idxOf(offset int) file.Idx { + return file.Idx(self.base + offset) +} + +func (self *_parser) expect(value token.Token) file.Idx { + idx := self.idx + if self.token != value { + self.errorUnexpectedToken(self.token) + } + self.next() + return idx +} + +func lineCount(str string) (int, int) { + line, last := 0, -1 + pair := false + for index, chr := range str { + switch chr { + case '\r': + line += 1 + last = index + pair = true + continue + case '\n': + if !pair { + line += 1 + } + last = index + case '\u2028', '\u2029': + line += 1 + last = index + 2 + } + pair = false + } + return line, last +} + +func (self *_parser) position(idx file.Idx) file.Position { + position := file.Position{} + offset := int(idx) - self.base + str := self.str[:offset] + position.Filename = self.filename + line, last := lineCount(str) + position.Line = 1 + line + if last >= 0 { + position.Column = offset - last + } else { + position.Column = 1 + len(str) + } + + return position +} diff --git a/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/parser_test.go b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/parser_test.go new file mode 100644 index 000000000..8f9457745 --- /dev/null +++ b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/parser_test.go @@ -0,0 +1,1004 @@ +package parser + +import ( + "errors" + "regexp" + "strings" + "testing" + + "github.com/robertkrimen/otto/ast" + "github.com/robertkrimen/otto/file" +) + +func firstErr(err error) error { + switch err := err.(type) { + case ErrorList: + return err[0] + } + return err +} + +var matchBeforeAfterSeparator = regexp.MustCompile(`(?m)^[ \t]*---$`) + +func testParse(src string) (parser *_parser, program *ast.Program, err error) { + defer func() { + if tmp := recover(); tmp != nil { + switch tmp := tmp.(type) { + case string: + if strings.HasPrefix(tmp, "SyntaxError:") { + parser = nil + program = nil + err = errors.New(tmp) + return + } + } + panic(tmp) + } + }() + parser = newParser("", src) + program, err = parser.parse() + return +} + +func TestParseFile(t *testing.T) { + tt(t, func() { + _, err := ParseFile(nil, "", `/abc/`, 0) + is(err, nil) + + _, err = ParseFile(nil, "", `/(?!def)abc/`, IgnoreRegExpErrors) + is(err, nil) + + _, err = ParseFile(nil, "", `/(?!def)abc/`, 0) + is(err, "(anonymous): Line 1:1 Invalid regular expression: re2: Invalid (?!) <lookahead>") + + _, err = ParseFile(nil, "", `/(?!def)abc/; return`, IgnoreRegExpErrors) + is(err, "(anonymous): Line 1:15 Illegal return statement") + }) +} + +func TestParseFunction(t *testing.T) { + tt(t, func() { + test := func(prm, bdy string, expect interface{}) *ast.FunctionLiteral { + function, err := ParseFunction(prm, bdy) + is(firstErr(err), expect) + return function + } + + test("a, b,c,d", "", nil) + + test("a, b;,c,d", "", "(anonymous): Line 1:15 Unexpected token ;") + + test("this", "", "(anonymous): Line 1:11 Unexpected token this") + + test("a, b, c, null", "", "(anonymous): Line 1:20 Unexpected token null") + + test("a, b,c,d", "return;", nil) + + test("a, b,c,d", "break;", "(anonymous): Line 2:1 Illegal break statement") + + test("a, b,c,d", "{}", nil) + }) +} + +func TestParserErr(t *testing.T) { + tt(t, func() { + test := func(input string, expect interface{}) (*ast.Program, *_parser) { + parser := newParser("", input) + program, err := parser.parse() + is(firstErr(err), expect) + return program, parser + } + + program, parser := test("", nil) + + program, parser = test(` + var abc; + break; do { + } while(true); + `, "(anonymous): Line 3:9 Illegal break statement") + { + stmt := program.Body[1].(*ast.BadStatement) + is(parser.position(stmt.From).Column, 9) + is(parser.position(stmt.To).Column, 16) + is(parser.slice(stmt.From, stmt.To), "break; ") + } + + test("{", "(anonymous): Line 1:2 Unexpected end of input") + + test("}", "(anonymous): Line 1:1 Unexpected token }") + + test("3ea", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("3in", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("3in []", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("3e", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("3e+", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("3e-", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("3x", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("3x0", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("0x", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("09", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("018", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("01.0", "(anonymous): Line 1:3 Unexpected number") + + test("01a", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("0x3in[]", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("\"Hello\nWorld\"", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("\u203f = 10", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("x\\", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("x\\\\", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("x\\u005c", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("x\\u002a", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("x\\\\u002a", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("/\n", "(anonymous): Line 1:1 Invalid regular expression: missing /") + + test("var x = /(s/g", "(anonymous): Line 1:9 Invalid regular expression: Unterminated group") + + test("0 = 1", "(anonymous): Line 1:1 Invalid left-hand side in assignment") + + test("func() = 1", "(anonymous): Line 1:1 Invalid left-hand side in assignment") + + test("(1 + 1) = 2", "(anonymous): Line 1:2 Invalid left-hand side in assignment") + + test("1++", "(anonymous): Line 1:2 Invalid left-hand side in assignment") + + test("1--", "(anonymous): Line 1:2 Invalid left-hand side in assignment") + + test("--1", "(anonymous): Line 1:1 Invalid left-hand side in assignment") + + test("for((1 + 1) in abc) def();", "(anonymous): Line 1:1 Invalid left-hand side in for-in") + + test("[", "(anonymous): Line 1:2 Unexpected end of input") + + test("[,", "(anonymous): Line 1:3 Unexpected end of input") + + test("1 + {", "(anonymous): Line 1:6 Unexpected end of input") + + test("1 + { abc:abc", "(anonymous): Line 1:14 Unexpected end of input") + + test("1 + { abc:abc,", "(anonymous): Line 1:15 Unexpected end of input") + + test("var abc = /\n/", "(anonymous): Line 1:11 Invalid regular expression: missing /") + + test("var abc = \"\n", "(anonymous): Line 1:11 Unexpected token ILLEGAL") + + test("var if = 0", "(anonymous): Line 1:5 Unexpected token if") + + test("abc + 0 = 1", "(anonymous): Line 1:1 Invalid left-hand side in assignment") + + test("+abc = 1", "(anonymous): Line 1:1 Invalid left-hand side in assignment") + + test("1 + (", "(anonymous): Line 1:6 Unexpected end of input") + + test("\n\n\n{", "(anonymous): Line 4:2 Unexpected end of input") + + test("\n/* Some multiline\ncomment */\n)", "(anonymous): Line 4:1 Unexpected token )") + + // TODO + //{ set 1 } + //{ get 2 } + //({ set: s(if) { } }) + //({ set s(.) { } }) + //({ set: s() { } }) + //({ set: s(a, b) { } }) + //({ get: g(d) { } }) + //({ get i() { }, i: 42 }) + //({ i: 42, get i() { } }) + //({ set i(x) { }, i: 42 }) + //({ i: 42, set i(x) { } }) + //({ get i() { }, get i() { } }) + //({ set i(x) { }, set i(x) { } }) + + test("function abc(if) {}", "(anonymous): Line 1:14 Unexpected token if") + + test("function abc(true) {}", "(anonymous): Line 1:14 Unexpected token true") + + test("function abc(false) {}", "(anonymous): Line 1:14 Unexpected token false") + + test("function abc(null) {}", "(anonymous): Line 1:14 Unexpected token null") + + test("function null() {}", "(anonymous): Line 1:10 Unexpected token null") + + test("function true() {}", "(anonymous): Line 1:10 Unexpected token true") + + test("function false() {}", "(anonymous): Line 1:10 Unexpected token false") + + test("function if() {}", "(anonymous): Line 1:10 Unexpected token if") + + test("a b;", "(anonymous): Line 1:3 Unexpected identifier") + + test("if.a", "(anonymous): Line 1:3 Unexpected token .") + + test("a if", "(anonymous): Line 1:3 Unexpected token if") + + test("a class", "(anonymous): Line 1:3 Unexpected reserved word") + + test("break\n", "(anonymous): Line 1:1 Illegal break statement") + + test("break 1;", "(anonymous): Line 1:7 Unexpected number") + + test("for (;;) { break 1; }", "(anonymous): Line 1:18 Unexpected number") + + test("continue\n", "(anonymous): Line 1:1 Illegal continue statement") + + test("continue 1;", "(anonymous): Line 1:10 Unexpected number") + + test("for (;;) { continue 1; }", "(anonymous): Line 1:21 Unexpected number") + + test("throw", "(anonymous): Line 1:1 Unexpected end of input") + + test("throw;", "(anonymous): Line 1:6 Unexpected token ;") + + test("throw \n", "(anonymous): Line 1:1 Unexpected end of input") + + test("for (var abc, def in {});", "(anonymous): Line 1:19 Unexpected token in") + + test("for ((abc in {});;);", nil) + + test("for ((abc in {}));", "(anonymous): Line 1:17 Unexpected token )") + + test("for (+abc in {});", "(anonymous): Line 1:1 Invalid left-hand side in for-in") + + test("if (false)", "(anonymous): Line 1:11 Unexpected end of input") + + test("if (false) abc(); else", "(anonymous): Line 1:23 Unexpected end of input") + + test("do", "(anonymous): Line 1:3 Unexpected end of input") + + test("while (false)", "(anonymous): Line 1:14 Unexpected end of input") + + test("for (;;)", "(anonymous): Line 1:9 Unexpected end of input") + + test("with (abc)", "(anonymous): Line 1:11 Unexpected end of input") + + test("try {}", "(anonymous): Line 1:1 Missing catch or finally after try") + + test("try {} catch {}", "(anonymous): Line 1:14 Unexpected token {") + + test("try {} catch () {}", "(anonymous): Line 1:15 Unexpected token )") + + test("\u203f = 1", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + // TODO + // const x = 12, y; + // const x, y = 12; + // const x; + // if(true) let a = 1; + // if(true) const a = 1; + + test(`new abc()."def"`, "(anonymous): Line 1:11 Unexpected string") + + test("/*", "(anonymous): Line 1:3 Unexpected end of input") + + test("/**", "(anonymous): Line 1:4 Unexpected end of input") + + test("/*\n\n\n", "(anonymous): Line 4:1 Unexpected end of input") + + test("/*\n\n\n*", "(anonymous): Line 4:2 Unexpected end of input") + + test("/*abc", "(anonymous): Line 1:6 Unexpected end of input") + + test("/*abc *", "(anonymous): Line 1:9 Unexpected end of input") + + test("\n]", "(anonymous): Line 2:1 Unexpected token ]") + + test("\r\n]", "(anonymous): Line 2:1 Unexpected token ]") + + test("\n\r]", "(anonymous): Line 3:1 Unexpected token ]") + + test("//\r\n]", "(anonymous): Line 2:1 Unexpected token ]") + + test("//\n\r]", "(anonymous): Line 3:1 Unexpected token ]") + + test("/abc\\\n/", "(anonymous): Line 1:1 Invalid regular expression: missing /") + + test("//\r \n]", "(anonymous): Line 3:1 Unexpected token ]") + + test("/*\r\n*/]", "(anonymous): Line 2:3 Unexpected token ]") + + test("/*\r \n*/]", "(anonymous): Line 3:3 Unexpected token ]") + + test("\\\\", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("\\u005c", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("\\abc", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("\\u0000", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("\\u200c = []", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("\\u200D = []", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test(`"\`, "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test(`"\u`, "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("return", "(anonymous): Line 1:1 Illegal return statement") + + test("continue", "(anonymous): Line 1:1 Illegal continue statement") + + test("break", "(anonymous): Line 1:1 Illegal break statement") + + test("switch (abc) { default: continue; }", "(anonymous): Line 1:25 Illegal continue statement") + + test("do { abc } *", "(anonymous): Line 1:12 Unexpected token *") + + test("while (true) { break abc; }", "(anonymous): Line 1:16 Undefined label 'abc'") + + test("while (true) { continue abc; }", "(anonymous): Line 1:16 Undefined label 'abc'") + + test("abc: while (true) { (function(){ break abc; }); }", "(anonymous): Line 1:34 Undefined label 'abc'") + + test("abc: while (true) { (function(){ abc: break abc; }); }", nil) + + test("abc: while (true) { (function(){ continue abc; }); }", "(anonymous): Line 1:34 Undefined label 'abc'") + + test(`abc: if (0) break abc; else {}`, nil) + + test(`abc: if (0) { break abc; } else {}`, nil) + + test(`abc: if (0) { break abc } else {}`, nil) + + test("abc: while (true) { abc: while (true) {} }", "(anonymous): Line 1:21 Label 'abc' already exists") + + if false { + // TODO When strict mode is implemented + test("(function () { 'use strict'; delete abc; }())", "") + } + + test("_: _: while (true) {]", "(anonymous): Line 1:4 Label '_' already exists") + + test("_:\n_:\nwhile (true) {]", "(anonymous): Line 2:1 Label '_' already exists") + + test("_:\n _:\nwhile (true) {]", "(anonymous): Line 2:4 Label '_' already exists") + + test("/Xyzzy(?!Nothing happens)/", + "(anonymous): Line 1:1 Invalid regular expression: re2: Invalid (?!) <lookahead>") + + test("function(){}", "(anonymous): Line 1:9 Unexpected token (") + + test("\n/*/", "(anonymous): Line 2:4 Unexpected end of input") + + test("/*/.source", "(anonymous): Line 1:11 Unexpected end of input") + + test("/\\1/.source", "(anonymous): Line 1:1 Invalid regular expression: re2: Invalid \\1 <backreference>") + + test("var class", "(anonymous): Line 1:5 Unexpected reserved word") + + test("var if", "(anonymous): Line 1:5 Unexpected token if") + + test("object Object", "(anonymous): Line 1:8 Unexpected identifier") + + test("[object Object]", "(anonymous): Line 1:9 Unexpected identifier") + + test("\\u0xyz", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test(`for (var abc, def in {}) {}`, "(anonymous): Line 1:19 Unexpected token in") + + test(`for (abc, def in {}) {}`, "(anonymous): Line 1:1 Invalid left-hand side in for-in") + + test(`for (var abc=def, ghi=("abc" in {}); true;) {}`, nil) + + { + // Semicolon insertion + + test("this\nif (1);", nil) + + test("while (1) { break\nif (1); }", nil) + + test("throw\nif (1);", "(anonymous): Line 1:1 Illegal newline after throw") + + test("(function(){ return\nif (1); })", nil) + + test("while (1) { continue\nif (1); }", nil) + + test("debugger\nif (1);", nil) + } + + { // Reserved words + + test("class", "(anonymous): Line 1:1 Unexpected reserved word") + test("abc.class = 1", nil) + test("var class;", "(anonymous): Line 1:5 Unexpected reserved word") + + test("const", "(anonymous): Line 1:1 Unexpected reserved word") + test("abc.const = 1", nil) + test("var const;", "(anonymous): Line 1:5 Unexpected reserved word") + + test("enum", "(anonymous): Line 1:1 Unexpected reserved word") + test("abc.enum = 1", nil) + test("var enum;", "(anonymous): Line 1:5 Unexpected reserved word") + + test("export", "(anonymous): Line 1:1 Unexpected reserved word") + test("abc.export = 1", nil) + test("var export;", "(anonymous): Line 1:5 Unexpected reserved word") + + test("extends", "(anonymous): Line 1:1 Unexpected reserved word") + test("abc.extends = 1", nil) + test("var extends;", "(anonymous): Line 1:5 Unexpected reserved word") + + test("import", "(anonymous): Line 1:1 Unexpected reserved word") + test("abc.import = 1", nil) + test("var import;", "(anonymous): Line 1:5 Unexpected reserved word") + + test("super", "(anonymous): Line 1:1 Unexpected reserved word") + test("abc.super = 1", nil) + test("var super;", "(anonymous): Line 1:5 Unexpected reserved word") + } + + { // Reserved words (strict) + + test(`implements`, nil) + test(`abc.implements = 1`, nil) + test(`var implements;`, nil) + + test(`interface`, nil) + test(`abc.interface = 1`, nil) + test(`var interface;`, nil) + + test(`let`, nil) + test(`abc.let = 1`, nil) + test(`var let;`, nil) + + test(`package`, nil) + test(`abc.package = 1`, nil) + test(`var package;`, nil) + + test(`private`, nil) + test(`abc.private = 1`, nil) + test(`var private;`, nil) + + test(`protected`, nil) + test(`abc.protected = 1`, nil) + test(`var protected;`, nil) + + test(`public`, nil) + test(`abc.public = 1`, nil) + test(`var public;`, nil) + + test(`static`, nil) + test(`abc.static = 1`, nil) + test(`var static;`, nil) + + test(`yield`, nil) + test(`abc.yield = 1`, nil) + test(`var yield;`, nil) + } + }) +} + +func TestParser(t *testing.T) { + tt(t, func() { + test := func(source string, chk interface{}) *ast.Program { + _, program, err := testParse(source) + is(firstErr(err), chk) + return program + } + + test(` + abc + -- + [] + `, "(anonymous): Line 3:13 Invalid left-hand side in assignment") + + test(` + abc-- + [] + `, nil) + + test("1\n[]\n", "(anonymous): Line 2:2 Unexpected token ]") + + test(` + function abc() { + } + abc() + `, nil) + + program := test("", nil) + + test("//", nil) + + test("/* */", nil) + + test("/** **/", nil) + + test("/*****/", nil) + + test("/*", "(anonymous): Line 1:3 Unexpected end of input") + + test("#", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("/**/#", "(anonymous): Line 1:5 Unexpected token ILLEGAL") + + test("new +", "(anonymous): Line 1:5 Unexpected token +") + + program = test(";", nil) + is(len(program.Body), 1) + is(program.Body[0].(*ast.EmptyStatement).Semicolon, file.Idx(1)) + + program = test(";;", nil) + is(len(program.Body), 2) + is(program.Body[0].(*ast.EmptyStatement).Semicolon, file.Idx(1)) + is(program.Body[1].(*ast.EmptyStatement).Semicolon, file.Idx(2)) + + program = test("1.2", nil) + is(len(program.Body), 1) + is(program.Body[0].(*ast.ExpressionStatement).Expression.(*ast.NumberLiteral).Literal, "1.2") + + program = test("/* */1.2", nil) + is(len(program.Body), 1) + is(program.Body[0].(*ast.ExpressionStatement).Expression.(*ast.NumberLiteral).Literal, "1.2") + + program = test("\n", nil) + is(len(program.Body), 0) + + test(` + if (0) { + abc = 0 + } + else abc = 0 + `, nil) + + test("if (0) abc = 0 else abc = 0", "(anonymous): Line 1:16 Unexpected token else") + + test(` + if (0) { + abc = 0 + } else abc = 0 + `, nil) + + test(` + if (0) { + abc = 1 + } else { + } + `, nil) + + test(` + do { + } while (true) + `, nil) + + test(` + try { + } finally { + } + `, nil) + + test(` + try { + } catch (abc) { + } finally { + } + `, nil) + + test(` + try { + } + catch (abc) { + } + finally { + } + `, nil) + + test(`try {} catch (abc) {} finally {}`, nil) + + test(` + do { + do { + } while (0) + } while (0) + `, nil) + + test(` + (function(){ + try { + if ( + 1 + ) { + return 1 + } + return 0 + } finally { + } + })() + `, nil) + + test("abc = ''\ndef", nil) + + test("abc = 1\ndef", nil) + + test("abc = Math\ndef", nil) + + test(`"\'"`, nil) + + test(` + abc = function(){ + } + abc = 0 + `, nil) + + test("abc.null = 0", nil) + + test("0x41", nil) + + test(`"\d"`, nil) + + test(`(function(){return this})`, nil) + + test(` + Object.defineProperty(Array.prototype, "0", { + value: 100, + writable: false, + configurable: true + }); + abc = [101]; + abc.hasOwnProperty("0") && abc[0] === 101; + `, nil) + + test(`new abc()`, nil) + test(`new {}`, nil) + + test(` + limit = 4 + result = 0 + while (limit) { + limit = limit - 1 + if (limit) { + } + else { + break + } + result = result + 1 + } + `, nil) + + test(` + while (0) { + if (0) { + continue + } + } + `, nil) + + test("var \u0061\u0062\u0063 = 0", nil) + + // 7_3_1 + test("var test7_3_1\nabc = 66;", nil) + test("var test7_3_1\u2028abc = 66;", nil) + + // 7_3_3 + test("//\u2028 =;", "(anonymous): Line 2:2 Unexpected token =") + + // 7_3_10 + test("var abc = \u2029;", "(anonymous): Line 2:1 Unexpected token ;") + test("var abc = \\u2029;", "(anonymous): Line 1:11 Unexpected token ILLEGAL") + test("var \\u0061\\u0062\\u0063 = 0;", nil) + + test("'", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + test("'\nstr\ning\n'", "(anonymous): Line 1:1 Unexpected token ILLEGAL") + + // S7.6_A4.3_T1 + test(`var $\u0030 = 0;`, nil) + + // S7.6.1.1_A1.1 + test(`switch = 1`, "(anonymous): Line 1:8 Unexpected token =") + + // S7.8.3_A2.1_T1 + test(`.0 === 0.0`, nil) + + // 7.8.5-1 + test("var regExp = /\\\rn/;", "(anonymous): Line 1:14 Invalid regular expression: missing /") + + // S7.8.5_A1.1_T2 + test("var regExp = /=/;", nil) + + // S7.8.5_A1.2_T1 + test("/*/", "(anonymous): Line 1:4 Unexpected end of input") + + // Sbp_7.9_A9_T3 + test(` + do { + ; + } while (false) true + `, nil) + + // S7.9_A10_T10 + test(` + {a:1 + } 3 + `, nil) + + test(` + abc + ++def + `, nil) + + // S7.9_A5.2_T1 + test(` + for(false;false + ) { + break; + } + `, "(anonymous): Line 3:13 Unexpected token )") + + // S7.9_A9_T8 + test(` + do {}; + while (false) + `, "(anonymous): Line 2:18 Unexpected token ;") + + // S8.4_A5 + test(` + "x\0y" + `, nil) + + // S9.3.1_A6_T1 + test(` + 10e10000 + `, nil) + + // 10.4.2-1-5 + test(` + "abc\ + def" + `, nil) + + test("'\\\n'", nil) + + test("'\\\r\n'", nil) + + //// 11.13.1-1-1 + test("42 = 42;", "(anonymous): Line 1:1 Invalid left-hand side in assignment") + + // S11.13.2_A4.2_T1.3 + test(` + abc /= "1" + `, nil) + + // 12.1-1 + test(` + try{};catch(){} + `, "(anonymous): Line 2:13 Missing catch or finally after try") + + // 12.1-3 + test(` + try{};finally{} + `, "(anonymous): Line 2:13 Missing catch or finally after try") + + // S12.6.3_A11.1_T3 + test(` + while (true) { + break abc; + } + `, "(anonymous): Line 3:17 Undefined label 'abc'") + + // S15.3_A2_T1 + test(`var x / = 1;`, "(anonymous): Line 1:7 Unexpected token /") + + test(` + function abc() { + if (0) + return; + else { + } + } + `, nil) + + test("//\u2028 var =;", "(anonymous): Line 2:6 Unexpected token =") + + test(` + throw + {} + `, "(anonymous): Line 2:13 Illegal newline after throw") + + // S7.6.1.1_A1.11 + test(` + function = 1 + `, "(anonymous): Line 2:22 Unexpected token =") + + // S7.8.3_A1.2_T1 + test(`0e1`, nil) + + test("abc = 1; abc\n++", "(anonymous): Line 2:3 Unexpected end of input") + + // --- + + test("({ get abc() {} })", nil) + + test(`for (abc.def in {}) {}`, nil) + + test(`while (true) { break }`, nil) + + test(`while (true) { continue }`, nil) + + test(`abc=/^(?:(\w+:)\/{2}(\w+(?:\.\w+)*\/?)|(.{0,2}\/{1}))?([/.]*?(?:[^?]+)?\/)?((?:[^/?]+)\.(\w+))(?:\?(\S+)?)?$/,def=/^(?:(\w+:)\/{2})|(.{0,2}\/{1})?([/.]*?(?:[^?]+)?\/?)?$/`, nil) + + test(`(function() { try {} catch (err) {} finally {} return })`, nil) + + test(`0xde0b6b3a7640080.toFixed(0)`, nil) + + test(`/[^-._0-9A-Za-z\xb7\xc0-\xd6\xd8-\xf6\xf8-\u037d\u37f-\u1fff\u200c-\u200d\u203f\u2040\u2070-\u218f]/`, nil) + + test(`/[\u0000-\u0008\u000B-\u000C\u000E-\u001F\uD800-\uDFFF\uFFFE-\uFFFF]/`, nil) + + test("var abc = 1;\ufeff", nil) + + test("\ufeff/* var abc = 1; */", nil) + + test(`if (-0x8000000000000000<=abc&&abc<=0x8000000000000000) {}`, nil) + + test(`(function(){debugger;return this;})`, nil) + + test(` + + `, nil) + + test(` + var abc = "" + debugger + `, nil) + + test(` + var abc = /\[\]$/ + debugger + `, nil) + + test(` + var abc = 1 / + 2 + debugger + `, nil) + }) +} + +func Test_parseStringLiteral(t *testing.T) { + tt(t, func() { + test := func(have, want string) { + have, err := parseStringLiteral(have) + is(err, nil) + is(have, want) + } + + test("", "") + + test("1(\\\\d+)", "1(\\d+)") + + test("\\u2029", "\u2029") + + test("abc\\uFFFFabc", "abc\uFFFFabc") + + test("[First line \\\nSecond line \\\n Third line\\\n. ]", + "[First line Second line Third line. ]") + + test("\\u007a\\x79\\u000a\\x78", "zy\nx") + + // S7.8.4_A4.2_T3 + test("\\a", "a") + test("\u0410", "\u0410") + + // S7.8.4_A5.1_T1 + test("\\0", "\u0000") + + // S8.4_A5 + test("\u0000", "\u0000") + + // 15.5.4.20 + test("'abc'\\\n'def'", "'abc''def'") + + // 15.5.4.20-4-1 + test("'abc'\\\r\n'def'", "'abc''def'") + + // Octal + test("\\0", "\000") + test("\\00", "\000") + test("\\000", "\000") + test("\\09", "\0009") + test("\\009", "\0009") + test("\\0009", "\0009") + test("\\1", "\001") + test("\\01", "\001") + test("\\001", "\001") + test("\\0011", "\0011") + test("\\1abc", "\001abc") + + test("\\\u4e16", "\u4e16") + + // err + test = func(have, want string) { + have, err := parseStringLiteral(have) + is(err.Error(), want) + is(have, "") + } + + test(`\u`, `invalid escape: \u: len("") != 4`) + test(`\u0`, `invalid escape: \u: len("0") != 4`) + test(`\u00`, `invalid escape: \u: len("00") != 4`) + test(`\u000`, `invalid escape: \u: len("000") != 4`) + + test(`\x`, `invalid escape: \x: len("") != 2`) + test(`\x0`, `invalid escape: \x: len("0") != 2`) + test(`\x0`, `invalid escape: \x: len("0") != 2`) + }) +} + +func Test_parseNumberLiteral(t *testing.T) { + tt(t, func() { + test := func(input string, expect interface{}) { + result, err := parseNumberLiteral(input) + is(err, nil) + is(result, expect) + } + + test("0", 0) + + test("0x8000000000000000", float64(9.223372036854776e+18)) + }) +} + +func TestPosition(t *testing.T) { + tt(t, func() { + parser := newParser("", "// Lorem ipsum") + + // Out of range, idx0 (error condition) + is(parser.slice(0, 1), "") + is(parser.slice(0, 10), "") + + // Out of range, idx1 (error condition) + is(parser.slice(1, 128), "") + + is(parser.str[0:0], "") + is(parser.slice(1, 1), "") + + is(parser.str[0:1], "/") + is(parser.slice(1, 2), "/") + + is(parser.str[0:14], "// Lorem ipsum") + is(parser.slice(1, 15), "// Lorem ipsum") + + parser = newParser("", "(function(){ return 0; })") + program, err := parser.parse() + is(err, nil) + + var node ast.Node + node = program.Body[0].(*ast.ExpressionStatement).Expression.(*ast.FunctionLiteral) + is(node.Idx0(), file.Idx(2)) + is(node.Idx1(), file.Idx(25)) + is(parser.slice(node.Idx0(), node.Idx1()), "function(){ return 0; }") + is(parser.slice(node.Idx0(), node.Idx1()+1), "function(){ return 0; })") + is(parser.slice(node.Idx0(), node.Idx1()+2), "") + is(node.(*ast.FunctionLiteral).Source, "function(){ return 0; }") + + node = program + is(node.Idx0(), file.Idx(2)) + is(node.Idx1(), file.Idx(25)) + is(parser.slice(node.Idx0(), node.Idx1()), "function(){ return 0; }") + + parser = newParser("", "(function(){ return abc; })") + program, err = parser.parse() + is(err, nil) + node = program.Body[0].(*ast.ExpressionStatement).Expression.(*ast.FunctionLiteral) + is(node.(*ast.FunctionLiteral).Source, "function(){ return abc; }") + }) +} diff --git a/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/regexp.go b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/regexp.go new file mode 100644 index 000000000..f614dae74 --- /dev/null +++ b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/regexp.go @@ -0,0 +1,358 @@ +package parser + +import ( + "bytes" + "fmt" + "strconv" +) + +type _RegExp_parser struct { + str string + length int + + chr rune // The current character + chrOffset int // The offset of current character + offset int // The offset after current character (may be greater than 1) + + errors []error + invalid bool // The input is an invalid JavaScript RegExp + + goRegexp *bytes.Buffer +} + +// TransformRegExp transforms a JavaScript pattern into a Go "regexp" pattern. +// +// re2 (Go) cannot do backtracking, so the presence of a lookahead (?=) (?!) or +// backreference (\1, \2, ...) will cause an error. +// +// re2 (Go) has a different definition for \s: [\t\n\f\r ]. +// The JavaScript definition, on the other hand, also includes \v, Unicode "Separator, Space", etc. +// +// If the pattern is invalid (not valid even in JavaScript), then this function +// returns the empty string and an error. +// +// If the pattern is valid, but incompatible (contains a lookahead or backreference), +// then this function returns the transformation (a non-empty string) AND an error. +func TransformRegExp(pattern string) (string, error) { + + if pattern == "" { + return "", nil + } + + // TODO If without \, if without (?=, (?!, then another shortcut + + parser := _RegExp_parser{ + str: pattern, + length: len(pattern), + goRegexp: bytes.NewBuffer(make([]byte, 0, 3*len(pattern)/2)), + } + parser.read() // Pull in the first character + parser.scan() + var err error + if len(parser.errors) > 0 { + err = parser.errors[0] + } + if parser.invalid { + return "", err + } + + // Might not be re2 compatible, but is still a valid JavaScript RegExp + return parser.goRegexp.String(), err +} + +func (self *_RegExp_parser) scan() { + for self.chr != -1 { + switch self.chr { + case '\\': + self.read() + self.scanEscape(false) + case '(': + self.pass() + self.scanGroup() + case '[': + self.pass() + self.scanBracket() + case ')': + self.error(-1, "Unmatched ')'") + self.invalid = true + self.pass() + default: + self.pass() + } + } +} + +// (...) +func (self *_RegExp_parser) scanGroup() { + str := self.str[self.chrOffset:] + if len(str) > 1 { // A possibility of (?= or (?! + if str[0] == '?' { + if str[1] == '=' || str[1] == '!' { + self.error(-1, "re2: Invalid (%s) <lookahead>", self.str[self.chrOffset:self.chrOffset+2]) + } + } + } + for self.chr != -1 && self.chr != ')' { + switch self.chr { + case '\\': + self.read() + self.scanEscape(false) + case '(': + self.pass() + self.scanGroup() + case '[': + self.pass() + self.scanBracket() + default: + self.pass() + continue + } + } + if self.chr != ')' { + self.error(-1, "Unterminated group") + self.invalid = true + return + } + self.pass() +} + +// [...] +func (self *_RegExp_parser) scanBracket() { + for self.chr != -1 { + if self.chr == ']' { + break + } else if self.chr == '\\' { + self.read() + self.scanEscape(true) + continue + } + self.pass() + } + if self.chr != ']' { + self.error(-1, "Unterminated character class") + self.invalid = true + return + } + self.pass() +} + +// \... +func (self *_RegExp_parser) scanEscape(inClass bool) { + offset := self.chrOffset + + var length, base uint32 + switch self.chr { + + case '0', '1', '2', '3', '4', '5', '6', '7': + var value int64 + size := 0 + for { + digit := int64(digitValue(self.chr)) + if digit >= 8 { + // Not a valid digit + break + } + value = value*8 + digit + self.read() + size += 1 + } + if size == 1 { // The number of characters read + _, err := self.goRegexp.Write([]byte{'\\', byte(value) + '0'}) + if err != nil { + self.errors = append(self.errors, err) + } + if value != 0 { + // An invalid backreference + self.error(-1, "re2: Invalid \\%d <backreference>", value) + } + return + } + tmp := []byte{'\\', 'x', '0', 0} + if value >= 16 { + tmp = tmp[0:2] + } else { + tmp = tmp[0:3] + } + tmp = strconv.AppendInt(tmp, value, 16) + _, err := self.goRegexp.Write(tmp) + if err != nil { + self.errors = append(self.errors, err) + } + return + + case '8', '9': + size := 0 + for { + digit := digitValue(self.chr) + if digit >= 10 { + // Not a valid digit + break + } + self.read() + size += 1 + } + err := self.goRegexp.WriteByte('\\') + if err != nil { + self.errors = append(self.errors, err) + } + _, err = self.goRegexp.WriteString(self.str[offset:self.chrOffset]) + if err != nil { + self.errors = append(self.errors, err) + } + self.error(-1, "re2: Invalid \\%s <backreference>", self.str[offset:self.chrOffset]) + return + + case 'x': + self.read() + length, base = 2, 16 + + case 'u': + self.read() + length, base = 4, 16 + + case 'b': + if inClass { + _, err := self.goRegexp.Write([]byte{'\\', 'x', '0', '8'}) + if err != nil { + self.errors = append(self.errors, err) + } + self.read() + return + } + fallthrough + + case 'B': + fallthrough + + case 'd', 'D', 's', 'S', 'w', 'W': + // This is slightly broken, because ECMAScript + // includes \v in \s, \S, while re2 does not + fallthrough + + case '\\': + fallthrough + + case 'f', 'n', 'r', 't', 'v': + err := self.goRegexp.WriteByte('\\') + if err != nil { + self.errors = append(self.errors, err) + } + self.pass() + return + + case 'c': + self.read() + var value int64 + if 'a' <= self.chr && self.chr <= 'z' { + value = int64(self.chr) - 'a' + 1 + } else if 'A' <= self.chr && self.chr <= 'Z' { + value = int64(self.chr) - 'A' + 1 + } else { + err := self.goRegexp.WriteByte('c') + if err != nil { + self.errors = append(self.errors, err) + } + return + } + tmp := []byte{'\\', 'x', '0', 0} + if value >= 16 { + tmp = tmp[0:2] + } else { + tmp = tmp[0:3] + } + tmp = strconv.AppendInt(tmp, value, 16) + _, err := self.goRegexp.Write(tmp) + if err != nil { + self.errors = append(self.errors, err) + } + self.read() + return + + default: + // $ is an identifier character, so we have to have + // a special case for it here + if self.chr == '$' || !isIdentifierPart(self.chr) { + // A non-identifier character needs escaping + err := self.goRegexp.WriteByte('\\') + if err != nil { + self.errors = append(self.errors, err) + } + } else { + // Unescape the character for re2 + } + self.pass() + return + } + + // Otherwise, we're a \u.... or \x... + valueOffset := self.chrOffset + + var value uint32 + { + length := length + for ; length > 0; length-- { + digit := uint32(digitValue(self.chr)) + if digit >= base { + // Not a valid digit + goto skip + } + value = value*base + digit + self.read() + } + } + + if length == 4 { + _, err := self.goRegexp.Write([]byte{ + '\\', + 'x', + '{', + self.str[valueOffset+0], + self.str[valueOffset+1], + self.str[valueOffset+2], + self.str[valueOffset+3], + '}', + }) + if err != nil { + self.errors = append(self.errors, err) + } + } else if length == 2 { + _, err := self.goRegexp.Write([]byte{ + '\\', + 'x', + self.str[valueOffset+0], + self.str[valueOffset+1], + }) + if err != nil { + self.errors = append(self.errors, err) + } + } else { + // Should never, ever get here... + self.error(-1, "re2: Illegal branch in scanEscape") + goto skip + } + + return + +skip: + _, err := self.goRegexp.WriteString(self.str[offset:self.chrOffset]) + if err != nil { + self.errors = append(self.errors, err) + } +} + +func (self *_RegExp_parser) pass() { + if self.chr != -1 { + _, err := self.goRegexp.WriteRune(self.chr) + if err != nil { + self.errors = append(self.errors, err) + } + } + self.read() +} + +// TODO Better error reporting, use the offset, etc. +func (self *_RegExp_parser) error(offset int, msg string, msgValues ...interface{}) error { + err := fmt.Errorf(msg, msgValues...) + self.errors = append(self.errors, err) + return err +} diff --git a/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/regexp_test.go b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/regexp_test.go new file mode 100644 index 000000000..3222db1a7 --- /dev/null +++ b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/regexp_test.go @@ -0,0 +1,149 @@ +package parser + +import ( + "regexp" + "testing" +) + +func TestRegExp(t *testing.T) { + tt(t, func() { + { + // err + test := func(input string, expect interface{}) { + _, err := TransformRegExp(input) + is(err, expect) + } + + test("[", "Unterminated character class") + + test("(", "Unterminated group") + + test("(?=)", "re2: Invalid (?=) <lookahead>") + + test("(?=)", "re2: Invalid (?=) <lookahead>") + + test("(?!)", "re2: Invalid (?!) <lookahead>") + + // An error anyway + test("(?=", "re2: Invalid (?=) <lookahead>") + + test("\\1", "re2: Invalid \\1 <backreference>") + + test("\\90", "re2: Invalid \\90 <backreference>") + + test("\\9123456789", "re2: Invalid \\9123456789 <backreference>") + + test("\\(?=)", "Unmatched ')'") + + test(")", "Unmatched ')'") + } + + { + // err + test := func(input, expect string, expectErr interface{}) { + output, err := TransformRegExp(input) + is(output, expect) + is(err, expectErr) + } + + test("(?!)", "(?!)", "re2: Invalid (?!) <lookahead>") + + test(")", "", "Unmatched ')'") + + test("(?!))", "", "re2: Invalid (?!) <lookahead>") + + test("\\0", "\\0", nil) + + test("\\1", "\\1", "re2: Invalid \\1 <backreference>") + + test("\\9123456789", "\\9123456789", "re2: Invalid \\9123456789 <backreference>") + } + + { + // err + test := func(input string, expect string) { + result, err := TransformRegExp(input) + is(err, nil) + if is(result, expect) { + _, err := regexp.Compile(result) + if !is(err, nil) { + t.Log(result) + } + } + } + + test("", "") + + test("abc", "abc") + + test(`\abc`, `abc`) + + test(`\a\b\c`, `a\bc`) + + test(`\x`, `x`) + + test(`\c`, `c`) + + test(`\cA`, `\x01`) + + test(`\cz`, `\x1a`) + + test(`\ca`, `\x01`) + + test(`\cj`, `\x0a`) + + test(`\ck`, `\x0b`) + + test(`\+`, `\+`) + + test(`[\b]`, `[\x08]`) + + test(`\u0z01\x\undefined`, `u0z01xundefined`) + + test(`\\|'|\r|\n|\t|\u2028|\u2029`, `\\|'|\r|\n|\t|\x{2028}|\x{2029}`) + + test("]", "]") + + test("}", "}") + + test("%", "%") + + test("(%)", "(%)") + + test("(?:[%\\s])", "(?:[%\\s])") + + test("[[]", "[[]") + + test("\\101", "\\x41") + + test("\\51", "\\x29") + + test("\\051", "\\x29") + + test("\\175", "\\x7d") + + test("\\04", "\\x04") + + test(`<%([\s\S]+?)%>`, `<%([\s\S]+?)%>`) + + test(`(.)^`, "(.)^") + + test(`<%-([\s\S]+?)%>|<%=([\s\S]+?)%>|<%([\s\S]+?)%>|$`, `<%-([\s\S]+?)%>|<%=([\s\S]+?)%>|<%([\s\S]+?)%>|$`) + + test(`\$`, `\$`) + + test(`[G-b]`, `[G-b]`) + + test(`[G-b\0]`, `[G-b\0]`) + } + }) +} + +func TestTransformRegExp(t *testing.T) { + tt(t, func() { + pattern, err := TransformRegExp(`\s+abc\s+`) + is(err, nil) + is(pattern, `\s+abc\s+`) + is(regexp.MustCompile(pattern).MatchString("\t abc def"), true) + }) +} diff --git a/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/scope.go b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/scope.go new file mode 100644 index 000000000..e1dbdda13 --- /dev/null +++ b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/scope.go @@ -0,0 +1,44 @@ +package parser + +import ( + "github.com/robertkrimen/otto/ast" +) + +type _scope struct { + outer *_scope + allowIn bool + inIteration bool + inSwitch bool + inFunction bool + declarationList []ast.Declaration + + labels []string +} + +func (self *_parser) openScope() { + self.scope = &_scope{ + outer: self.scope, + allowIn: true, + } +} + +func (self *_parser) closeScope() { + self.scope = self.scope.outer +} + +func (self *_scope) declare(declaration ast.Declaration) { + self.declarationList = append(self.declarationList, declaration) +} + +func (self *_scope) hasLabel(name string) bool { + for _, label := range self.labels { + if label == name { + return true + } + } + if self.outer != nil && !self.inFunction { + // Crossing a function boundary to look for a label is verboten + return self.outer.hasLabel(name) + } + return false +} diff --git a/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/statement.go b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/statement.go new file mode 100644 index 000000000..2059d3856 --- /dev/null +++ b/Godeps/_workspace/src/github.com/robertkrimen/otto/parser/statement.go @@ -0,0 +1,663 @@ +package parser + +import ( + "github.com/robertkrimen/otto/ast" + "github.com/robertkrimen/otto/token" +) + +func (self *_parser) parseBlockStatement() *ast.BlockStatement { + node := &ast.BlockStatement{} + node.LeftBrace = self.expect(token.LEFT_BRACE) + node.List = self.parseStatementList() + node.RightBrace = self.expect(token.RIGHT_BRACE) + + return node +} + +func (self *_parser) parseEmptyStatement() ast.Statement { + idx := self.expect(token.SEMICOLON) + return &ast.EmptyStatement{Semicolon: idx} +} + +func (self *_parser) parseStatementList() (list []ast.Statement) { + for self.token != token.RIGHT_BRACE && self.token != token.EOF { + list = append(list, self.parseStatement()) + } + + return +} + +func (self *_parser) parseStatement() ast.Statement { + + if self.token == token.EOF { + self.errorUnexpectedToken(self.token) + return &ast.BadStatement{From: self.idx, To: self.idx + 1} + } + + switch self.token { + case token.SEMICOLON: + return self.parseEmptyStatement() + case token.LEFT_BRACE: + return self.parseBlockStatement() + case token.IF: + return self.parseIfStatement() + case token.DO: + return self.parseDoWhileStatement() + case token.WHILE: + return self.parseWhileStatement() + case token.FOR: + return self.parseForOrForInStatement() + case token.BREAK: + return self.parseBreakStatement() + case token.CONTINUE: + return self.parseContinueStatement() + case token.DEBUGGER: + return self.parseDebuggerStatement() + case token.WITH: + return self.parseWithStatement() + case token.VAR: + return self.parseVariableStatement() + case token.FUNCTION: + self.parseFunction(true) + // FIXME + return &ast.EmptyStatement{} + case token.SWITCH: + return self.parseSwitchStatement() + case token.RETURN: + return self.parseReturnStatement() + case token.THROW: + return self.parseThrowStatement() + case token.TRY: + return self.parseTryStatement() + } + + expression := self.parseExpression() + + if identifier, isIdentifier := expression.(*ast.Identifier); isIdentifier && self.token == token.COLON { + // LabelledStatement + colon := self.idx + self.next() // : + label := identifier.Name + for _, value := range self.scope.labels { + if label == value { + self.error(identifier.Idx0(), "Label '%s' already exists", label) + } + } + self.scope.labels = append(self.scope.labels, label) // Push the label + statement := self.parseStatement() + self.scope.labels = self.scope.labels[:len(self.scope.labels)-1] // Pop the label + return &ast.LabelledStatement{ + Label: identifier, + Colon: colon, + Statement: statement, + } + } + + self.optionalSemicolon() + + return &ast.ExpressionStatement{ + Expression: expression, + } +} + +func (self *_parser) parseTryStatement() ast.Statement { + + node := &ast.TryStatement{ + Try: self.expect(token.TRY), + Body: self.parseBlockStatement(), + } + + if self.token == token.CATCH { + catch := self.idx + self.next() + self.expect(token.LEFT_PARENTHESIS) + if self.token != token.IDENTIFIER { + self.expect(token.IDENTIFIER) + self.nextStatement() + return &ast.BadStatement{From: catch, To: self.idx} + } else { + identifier := self.parseIdentifier() + self.expect(token.RIGHT_PARENTHESIS) + node.Catch = &ast.CatchStatement{ + Catch: catch, + Parameter: identifier, + Body: self.parseBlockStatement(), + } + } + } + + if self.token == token.FINALLY { + self.next() + node.Finally = self.parseBlockStatement() + } + + if node.Catch == nil && node.Finally == nil { + self.error(node.Try, "Missing catch or finally after try") + return &ast.BadStatement{From: node.Try, To: node.Body.Idx1()} + } + + return node +} + +func (self *_parser) parseFunctionParameterList() *ast.ParameterList { + opening := self.expect(token.LEFT_PARENTHESIS) + var list []*ast.Identifier + for self.token != token.RIGHT_PARENTHESIS && self.token != token.EOF { + if self.token != token.IDENTIFIER { + self.expect(token.IDENTIFIER) + } else { + list = append(list, self.parseIdentifier()) + } + if self.token != token.RIGHT_PARENTHESIS { + self.expect(token.COMMA) + } + } + closing := self.expect(token.RIGHT_PARENTHESIS) + + return &ast.ParameterList{ + Opening: opening, + List: list, + Closing: closing, + } +} + +func (self *_parser) parseParameterList() (list []string) { + for self.token != token.EOF { + if self.token != token.IDENTIFIER { + self.expect(token.IDENTIFIER) + } + list = append(list, self.literal) + self.next() + if self.token != token.EOF { + self.expect(token.COMMA) + } + } + return +} + +func (self *_parser) parseFunction(declaration bool) *ast.FunctionLiteral { + + node := &ast.FunctionLiteral{ + Function: self.expect(token.FUNCTION), + } + + var name *ast.Identifier + if self.token == token.IDENTIFIER { + name = self.parseIdentifier() + if declaration { + self.scope.declare(&ast.FunctionDeclaration{ + Function: node, + }) + } + } else if declaration { + // Use expect error handling + self.expect(token.IDENTIFIER) + } + node.Name = name + node.ParameterList = self.parseFunctionParameterList() + self.parseFunctionBlock(node) + node.Source = self.slice(node.Idx0(), node.Idx1()) + + return node +} + +func (self *_parser) parseFunctionBlock(node *ast.FunctionLiteral) { + { + self.openScope() + inFunction := self.scope.inFunction + self.scope.inFunction = true + defer func() { + self.scope.inFunction = inFunction + self.closeScope() + }() + node.Body = self.parseBlockStatement() + node.DeclarationList = self.scope.declarationList + } +} + +func (self *_parser) parseDebuggerStatement() ast.Statement { + idx := self.expect(token.DEBUGGER) + + node := &ast.DebuggerStatement{ + Debugger: idx, + } + + self.semicolon() + + return node +} + +func (self *_parser) parseReturnStatement() ast.Statement { + idx := self.expect(token.RETURN) + + if !self.scope.inFunction { + self.error(idx, "Illegal return statement") + self.nextStatement() + return &ast.BadStatement{From: idx, To: self.idx} + } + + node := &ast.ReturnStatement{ + Return: idx, + } + + if !self.implicitSemicolon && self.token != token.SEMICOLON && self.token != token.RIGHT_BRACE && self.token != token.EOF { + node.Argument = self.parseExpression() + } + + self.semicolon() + + return node +} + +func (self *_parser) parseThrowStatement() ast.Statement { + idx := self.expect(token.THROW) + + if self.implicitSemicolon { + if self.chr == -1 { // Hackish + self.error(idx, "Unexpected end of input") + } else { + self.error(idx, "Illegal newline after throw") + } + self.nextStatement() + return &ast.BadStatement{From: idx, To: self.idx} + } + + node := &ast.ThrowStatement{ + Argument: self.parseExpression(), + } + + self.semicolon() + + return node +} + +func (self *_parser) parseSwitchStatement() ast.Statement { + self.expect(token.SWITCH) + self.expect(token.LEFT_PARENTHESIS) + node := &ast.SwitchStatement{ + Discriminant: self.parseExpression(), + Default: -1, + } + self.expect(token.RIGHT_PARENTHESIS) + + self.expect(token.LEFT_BRACE) + + inSwitch := self.scope.inSwitch + self.scope.inSwitch = true + defer func() { + self.scope.inSwitch = inSwitch + }() + + for index := 0; self.token != token.EOF; index++ { + if self.token == token.RIGHT_BRACE { + self.next() + break + } + + clause := self.parseCaseStatement() + if clause.Test == nil { + if node.Default != -1 { + self.error(clause.Case, "Already saw a default in switch") + } + node.Default = index + } + node.Body = append(node.Body, clause) + } + + return node +} + +func (self *_parser) parseWithStatement() ast.Statement { + self.expect(token.WITH) + self.expect(token.LEFT_PARENTHESIS) + node := &ast.WithStatement{ + Object: self.parseExpression(), + } + self.expect(token.RIGHT_PARENTHESIS) + + node.Body = self.parseStatement() + + return node +} + +func (self *_parser) parseCaseStatement() *ast.CaseStatement { + + node := &ast.CaseStatement{ + Case: self.idx, + } + if self.token == token.DEFAULT { + self.next() + } else { + self.expect(token.CASE) + node.Test = self.parseExpression() + } + self.expect(token.COLON) + + for { + if self.token == token.EOF || + self.token == token.RIGHT_BRACE || + self.token == token.CASE || + self.token == token.DEFAULT { + break + } + node.Consequent = append(node.Consequent, self.parseStatement()) + + } + + return node +} + +func (self *_parser) parseIterationStatement() ast.Statement { + inIteration := self.scope.inIteration + self.scope.inIteration = true + defer func() { + self.scope.inIteration = inIteration + }() + return self.parseStatement() +} + +func (self *_parser) parseForIn(into ast.Expression) *ast.ForInStatement { + + // Already have consumed "<into> in" + + source := self.parseExpression() + self.expect(token.RIGHT_PARENTHESIS) + + return &ast.ForInStatement{ + Into: into, + Source: source, + Body: self.parseIterationStatement(), + } +} + +func (self *_parser) parseFor(initializer ast.Expression) *ast.ForStatement { + + // Already have consumed "<initializer> ;" + + var test, update ast.Expression + + if self.token != token.SEMICOLON { + test = self.parseExpression() + } + self.expect(token.SEMICOLON) + + if self.token != token.RIGHT_PARENTHESIS { + update = self.parseExpression() + } + self.expect(token.RIGHT_PARENTHESIS) + + return &ast.ForStatement{ + Initializer: initializer, + Test: test, + Update: update, + Body: self.parseIterationStatement(), + } +} + +func (self *_parser) parseForOrForInStatement() ast.Statement { + idx := self.expect(token.FOR) + self.expect(token.LEFT_PARENTHESIS) + + var left []ast.Expression + + forIn := false + if self.token != token.SEMICOLON { + + allowIn := self.scope.allowIn + self.scope.allowIn = false + if self.token == token.VAR { + var_ := self.idx + self.next() + list := self.parseVariableDeclarationList(var_) + if len(list) == 1 && self.token == token.IN { + self.next() // in + forIn = true + left = []ast.Expression{list[0]} // There is only one declaration + } else { + left = list + } + } else { + left = append(left, self.parseExpression()) + if self.token == token.IN { + self.next() + forIn = true + } + } + self.scope.allowIn = allowIn + } + + if forIn { + switch left[0].(type) { + case *ast.Identifier, *ast.DotExpression, *ast.BracketExpression, *ast.VariableExpression: + // These are all acceptable + default: + self.error(idx, "Invalid left-hand side in for-in") + self.nextStatement() + return &ast.BadStatement{From: idx, To: self.idx} + } + return self.parseForIn(left[0]) + } + + self.expect(token.SEMICOLON) + return self.parseFor(&ast.SequenceExpression{Sequence: left}) +} + +func (self *_parser) parseVariableStatement() *ast.VariableStatement { + + idx := self.expect(token.VAR) + + list := self.parseVariableDeclarationList(idx) + self.semicolon() + + return &ast.VariableStatement{ + Var: idx, + List: list, + } +} + +func (self *_parser) parseDoWhileStatement() ast.Statement { + inIteration := self.scope.inIteration + self.scope.inIteration = true + defer func() { + self.scope.inIteration = inIteration + }() + + self.expect(token.DO) + node := &ast.DoWhileStatement{} + if self.token == token.LEFT_BRACE { + node.Body = self.parseBlockStatement() + } else { + node.Body = self.parseStatement() + } + + self.expect(token.WHILE) + self.expect(token.LEFT_PARENTHESIS) + node.Test = self.parseExpression() + self.expect(token.RIGHT_PARENTHESIS) + + return node +} + +func (self *_parser) parseWhileStatement() ast.Statement { + self.expect(token.WHILE) + self.expect(token.LEFT_PARENTHESIS) + node := &ast.WhileStatement{ + Test: self.parseExpression(), + } + self.expect(token.RIGHT_PARENTHESIS) + node.Body = self.parseIterationStatement() + + return node +} + +func (self *_parser) parseIfStatement() ast.Statement { + self.expect(token.IF) + self.expect(token.LEFT_PARENTHESIS) + node := &ast.IfStatement{ + Test: self.parseExpression(), + } + self.expect(token.RIGHT_PARENTHESIS) + + if self.token == token.LEFT_BRACE { + node.Consequent = self.parseBlockStatement() + } else { + node.Consequent = self.parseStatement() + } + + if self.token == token.ELSE { + self.next() + node.Alternate = self.parseStatement() + } + + return node +} + +func (self *_parser) parseSourceElement() ast.Statement { + return self.parseStatement() +} + +func (self *_parser) parseSourceElements() []ast.Statement { + body := []ast.Statement(nil) + + for { + if self.token != token.STRING { + break + } + + body = append(body, self.parseSourceElement()) + } + + for self.token != token.EOF { + body = append(body, self.parseSourceElement()) + } + + return body +} + +func (self *_parser) parseProgram() *ast.Program { + self.openScope() + defer self.closeScope() + return &ast.Program{ + Body: self.parseSourceElements(), + DeclarationList: self.scope.declarationList, + File: self.file, + } +} + +func (self *_parser) parseBreakStatement() ast.Statement { + idx := self.expect(token.BREAK) + semicolon := self.implicitSemicolon + if self.token == token.SEMICOLON { + semicolon = true + self.next() + } + + if semicolon || self.token == token.RIGHT_BRACE { + self.implicitSemicolon = false + if !self.scope.inIteration && !self.scope.inSwitch { + goto illegal + } + return &ast.BranchStatement{ + Idx: idx, + Token: token.BREAK, + } + } + + if self.token == token.IDENTIFIER { + identifier := self.parseIdentifier() + if !self.scope.hasLabel(identifier.Name) { + self.error(idx, "Undefined label '%s'", identifier.Name) + return &ast.BadStatement{From: idx, To: identifier.Idx1()} + } + self.semicolon() + return &ast.BranchStatement{ + Idx: idx, + Token: token.BREAK, + Label: identifier, + } + } + + self.expect(token.IDENTIFIER) + +illegal: + self.error(idx, "Illegal break statement") + self.nextStatement() + return &ast.BadStatement{From: idx, To: self.idx} +} + +func (self *_parser) parseContinueStatement() ast.Statement { + idx := self.expect(token.CONTINUE) + semicolon := self.implicitSemicolon + if self.token == token.SEMICOLON { + semicolon = true + self.next() + } + + if semicolon || self.token == token.RIGHT_BRACE { + self.implicitSemicolon = false + if !self.scope.inIteration { + goto illegal + } + return &ast.BranchStatement{ + Idx: idx, + Token: token.CONTINUE, + } + } + + if self.token == token.IDENTIFIER { + identifier := self.parseIdentifier() + if !self.scope.hasLabel(identifier.Name) { + self.error(idx, "Undefined label '%s'", identifier.Name) + return &ast.BadStatement{From: idx, To: identifier.Idx1()} + } + if !self.scope.inIteration { + goto illegal + } + self.semicolon() + return &ast.BranchStatement{ + Idx: idx, + Token: token.CONTINUE, + Label: identifier, + } + } + + self.expect(token.IDENTIFIER) + +illegal: + self.error(idx, "Illegal continue statement") + self.nextStatement() + return &ast.BadStatement{From: idx, To: self.idx} +} + +// Find the next statement after an error (recover) +func (self *_parser) nextStatement() { + for { + switch self.token { + case token.BREAK, token.CONTINUE, + token.FOR, token.IF, token.RETURN, token.SWITCH, + token.VAR, token.DO, token.TRY, token.WITH, + token.WHILE, token.THROW, token.CATCH, token.FINALLY: + // Return only if parser made some progress since last + // sync or if it has not reached 10 next calls without + // progress. Otherwise consume at least one token to + // avoid an endless parser loop + if self.idx == self.recover.idx && self.recover.count < 10 { + self.recover.count++ + return + } + if self.idx > self.recover.idx { + self.recover.idx = self.idx + self.recover.count = 0 + return + } + // Reaching here indicates a parser bug, likely an + // incorrect token list in this function, but it only + // leads to skipping of possibly correct code if a + // previous error is present, and thus is preferred + // over a non-terminating parse. + case token.EOF: + return + } + self.next() + } +} |