Compare commits

...

5 Commits

Author SHA1 Message Date
Yury Kurlykov c09b52638c
Add ESTree link in README.md 2020-04-27 23:13:42 +10:00
Yury Kurlykov 7f68959590
Make a huge load of changes
- Remove visitors
- Add logging
- Add file read
- Make ESTree nodes work
- Move JS input streams to its own module
- Implement parse tree listeners (WIP)
- Add custom error listener (WIP)
2020-04-27 23:07:15 +10:00
Yury Kurlykov 0a85994fae
Comment out throw keyword
Exceptions are not going to be implemented soon.
2020-04-27 22:57:54 +10:00
Yury Kurlykov dc7502aa57
Implement ES5 part of JavaScript AST 2020-04-26 23:10:26 +10:00
Yury Kurlykov 124d299d23
Postpone template strings and for/of support 2020-04-26 21:21:03 +10:00
13 changed files with 1429 additions and 44 deletions

View File

@ -22,12 +22,15 @@ pip install -r requirements.txt # Use requirements-dev.txt if you want to run te
## Running
```bash
antlr4 -o jasminesnake/lex -package lex -Dlanguage=Python3 grammars/*.g4
antlr4 -Xexact-output-dir -o jasminesnake/lex -package lex -Dlanguage=Python3 -listener grammars/*.g4
python -m jasminesnake
```
## Credits
ESTree specification:
[https://github.com/estree/estree/](https://github.com/estree/estree/)
JavaScript grammar source:
[https://github.com/antlr/grammars-v4/tree/master/javascript/javascript](https://github.com/antlr/grammars-v4/tree/master/javascript/javascript)

View File

@ -139,7 +139,6 @@ Function: 'function';
This: 'this';
Default: 'default';
If: 'if';
Throw: 'throw';
Delete: 'delete';
In: 'in';
As: 'as';
@ -155,6 +154,7 @@ From: 'from';
//Debugger: 'debugger';
//With: 'with';
//Try: 'try';
//Throw: 'throw';
/// Future Reserved Words
@ -192,7 +192,7 @@ StringLiteral: ('"' DoubleStringCharacter* '"'
;
// TODO: `${`tmp`}`
TemplateStringLiteral: '`' ('\\`' | ~'`')* '`';
//TemplateStringLiteral: '`' ('\\`' | ~'`')* '`';
WhiteSpaces: [\t\u000B\u000C\u0020\u00A0]+ -> channel(HIDDEN);

View File

@ -150,7 +150,7 @@ iterationStatement
| For '(' (expressionSequence | variableDeclarationList)? ';' expressionSequence? ';' expressionSequence? ')' statement # ForStatement
| For '(' (singleExpression | variableDeclarationList) In expressionSequence ')' statement # ForInStatement
// strange, 'of' is an identifier. and self.p("of") not work in sometime.
| For /*Await?*/ '(' (singleExpression | variableDeclarationList) identifier{self.p("of")}? expressionSequence ')' statement # ForOfStatement
// | For /*Await?*/ '(' (singleExpression | variableDeclarationList) identifier{self.p("of")}? expressionSequence ')' statement # ForOfStatement
;
varModifier // let, const - ECMAScript 6
@ -346,7 +346,7 @@ singleExpression
| <assoc=right> singleExpression '=' singleExpression # AssignmentExpression
| <assoc=right> singleExpression assignmentOperator singleExpression # AssignmentOperatorExpression
| Import '(' singleExpression ')' # ImportExpression
| singleExpression TemplateStringLiteral # TemplateStringExpression // ECMAScript 6
// | singleExpression TemplateStringLiteral # TemplateStringExpression // ECMAScript 6
// | yieldStatement # YieldExpression // ECMAScript 6
| This # ThisExpression
| identifier # IdentifierExpression
@ -402,7 +402,7 @@ literal
: NullLiteral
| BooleanLiteral
| StringLiteral
| TemplateStringLiteral
// | TemplateStringLiteral
| RegularExpressionLiteral
| numericLiteral
| bigintLiteral
@ -471,7 +471,7 @@ keyword
// | With
| Default
| If
| Throw
// | Throw
| Delete
| In
// | Try

View File

@ -1,8 +1,13 @@
__version__ = "0.0.1"
__snake__ = """
"""Pylint tells me this module should have a docstring.
So here it is.
"""
import logging
__version__ = "0.0.2"
__snake__ = r"""
_________ _________
/ \ / \\
/ /~~~~~\ \ / /~~~~~\ \\
/ \ / \
/ /~~~~~\ \ / /~~~~~\ \
| | | | | | | |
| | | | | | | |
| | | | | | | | /
@ -12,4 +17,19 @@ __snake__ = """
| ~~~~~~~~~ ~~~~~~~~
^
"""
LOG_LEVELS = {
0: {"level": logging.CRITICAL, "format": u"[%(asctime)s] %(message)s"},
1: {
"level": logging.ERROR,
"format": u"[%(asctime)s] [%(levelname)s] %(message)s",
},
2: {"level": logging.WARN, "format": u"[%(asctime)s] [%(levelname)s] %(message)s"},
3: {"level": logging.INFO, "format": u"[%(asctime)s] [%(levelname)s] %(message)s"},
4: {
"level": logging.DEBUG,
"format": u"[%(asctime)s] [%(levelname)s] %(filename)s:%(lineno)d: %(message)s",
},
}
# TODO: make it usable as a module too

View File

@ -1,34 +1,53 @@
from jasminesnake import __version__, __snake__
from antlr4 import *
from .lex import JavaScriptLexer, JavaScriptParser
"""Pylint tells me this module should have a docstring.
So here it is.
"""
import sys
import argparse
import logging
import colorama
import coloredlogs
from jasminesnake import __version__, __snake__, LOG_LEVELS
from .js_stream import JSBaseStream, JSStringStream, JSFileStream
from .lex.ErrorListeners import LogErrorListener
import ast
arg_parser = argparse.ArgumentParser(
def create_argument_parser():
_arg_parser = argparse.ArgumentParser(
description="Jasmine Snake, another JS interpreter in Python",
epilog="I hope you don't use it, **especially** in production.",
)
arg_parser.add_argument("--snake", action="store_true", help="Print a snake")
args = arg_parser.parse_args()
_arg_parser.add_argument("--snake", action="store_true", help="print a snake")
_arg_parser.add_argument(
"--verbose",
"-v",
action="count",
default=0,
help="be more verbose. up to 4 (-vvvv) could be handled, more are ignored",
)
_arg_parser.add_argument(
"infile",
type=str,
help='JS input file. use "-" to read input from stdin.',
nargs="?",
)
JSL = JavaScriptLexer.JavaScriptLexer
JSP = JavaScriptParser.JavaScriptParser
class WriteTreeListener(ParseTreeListener):
def visitTerminal(self, node: TerminalNode):
print("Visit Terminal: " + str(node) + " - " + repr(node))
return _arg_parser
def main():
# Init colorama
colorama.init()
print("Jasmine Snake v{version}".format(version=__version__))
# Init logging
log_level = min(args.verbose, 4) # Ignore verbosity values more than 4
coloredlogs.install(
level=LOG_LEVELS[log_level]["level"], fmt=LOG_LEVELS[log_level]["format"]
)
# Print the snake if an argument is present
if args.snake:
print(colorama.Style.DIM + __snake__ + colorama.Style.RESET_ALL)
print(
@ -39,22 +58,48 @@ def main():
+ colorama.Fore.RESET
)
# Read JS code from file or stdin
if args.infile is not None:
stream: JSBaseStream
if args.infile == "-":
input_str = sys.stdin.read()
stream = JSStringStream(input_str)
else:
stream = JSFileStream(args.infile, LogErrorListener())
tree = stream.parse()
ast_tree = ast.from_parse_tree(tree)
# TODO: run logic
sys.exit(0)
print("Jasmine Snake v{version}".format(version=__version__))
print(
colorama.Fore.YELLOW
+ "Notice that only single-line statements are supported."
+ colorama.Fore.RESET
)
print()
input_stream = InputStream("var a;\n{a=2+a;}")
lexer = JSL(input_stream)
stream = CommonTokenStream(lexer)
try:
while True:
input_str = input("> ")
logging.debug("Got input %s", input_str)
stream.fill()
for token in stream.tokens:
print("Token: {}".format(str(token)))
stream = JSStringStream(input_str, LogErrorListener())
tree = stream.parse()
logging.debug("Got tree %s", tree.toStringTree(stream.parser.ruleNames))
parser = JSP(stream)
print("Created parsers")
tree = parser.program()
print(tree.toStringTree(parser.ruleNames))
# ParseTreeWalker.DEFAULT.walk(WriteTreeListener(), tree)
ast_tree = ast.from_parse_tree(tree)
# TODO: run logic
except EOFError:
print("Ctrl-D received, shutting down...")
sys.exit(0)
if __name__ == "__main__":
arg_parser = create_argument_parser()
args = arg_parser.parse_args()
main()

View File

@ -0,0 +1,29 @@
"""AST module."""
from antlr4 import ParseTreeWalker
from tree_format import format_tree
import lex.JavaScriptParser as Parser
import ast.nodes
from .parse_tree_listeners import ASTListener
JSP = Parser.JavaScriptParser
def from_parse_tree(tree: JSP.ProgramContext) -> ast.nodes.Program:
"""Generate AST from ANTLR parse tree.
Args:
tree (JSP.ProgramContext): ANTLR parse tree.
Returns:
`Program` AST node, which is the root node.
"""
ast_listener = ASTListener()
ParseTreeWalker.DEFAULT.walk(ast_listener, tree)
return ast_listener.program_node
# Delete temporary imports
del JSP
del Parser

988
jasminesnake/ast/nodes.py Normal file
View File

@ -0,0 +1,988 @@
"""The module with AST nodes declaration. They are ESTree compliant.
The module lacks support of:
* ES5 features:
* labelled statements
* switch statements
* try-catch statements
* debugger statement
* with statement
* RegExp
* ES6 features:
* generators/yield statement
* for-of statement
* template literals
* and other ES6 features :)
More about ESTree standard:
https://github.com/estree/estree/
Todo:
* Add support for lacking features
"""
from typing import List, Union, Optional, Literal as TypeLiteral, TypedDict
from enum import Enum
# The Lord sees I actually wanted to split it up, but ESTree hierarchy is so messed up... No. It's actually *fucked up*
# that much that I couldn't even resolve circular dependencies in the submodules. I have to reap what I've sown.
# Custom types used in the nodes
number = Union[int, float]
"""A type union consisting of int and float Python types. Consider it as Number type from JavaScript."""
SourceTypeLiteral = TypeLiteral["script", "module"]
"""The type for the `sourceType` field."""
VarDeclKind = TypeLiteral["var", "let", "const"]
"""The type for the `kind` field of `VariableDeclaration`."""
PropKind = TypeLiteral["init", "get", "set"]
"""A type for a `kind` field of `Property`."""
class UnaryOperator(Enum):
"""A unary operator token."""
MINUS = "-"
PLUS = "+"
NOT_LOGIC = "!"
NOT_BIT = "~"
TYPEOF = "typeof"
VOID = "void"
DELETE = "delete"
class UpdateOperator(Enum):
"""An update (increment or decrement) operator token."""
INCREMENT = "++"
DECREMENT = "--"
class BinaryOperator(Enum):
"""A binary operator token."""
EQ = "=="
NEQ = "!="
EQ_IDENTITY = "==="
NEQ_IDENTITY = "!=="
LT = "<"
LTE = "<="
GT = ">"
GTE = ">="
SHL = "<<"
SHR = ">>"
SHR_LOGIC = ">>>"
ADD = "+"
SUB = "-"
MUL = "*"
DIV = "/"
MOD = "%"
OR = "|"
XOR = "^"
AND = "&"
IN = "in"
INSTANCEOF = "instanceof"
class AssignmentOperator(Enum):
"""An assignment operator token."""
ASSIGN = "="
ADD = "+="
SUB = "-="
MUL = "*="
DIV = "/="
MOD = "%="
SHL = "<<="
SHR = ">>="
SHR_LOGIC = ">>>="
OR = "|="
XOR = "^="
AND = "&="
class LogicalOperator(Enum):
"""A logical operator token."""
OR = "||"
AND = "&&"
# Nodes forward declarations
class Expression:
...
class Pattern:
...
class Directive:
...
class Statement:
...
class FunctionBody:
...
class VariableDeclaration:
...
class Property:
...
# "Node objects" block
class Position:
"""The class for an object consisting of a line number (1-indexed) and a column number (0-indexed)."""
def __init__(self, line: int, column: int):
if line < 1 or column < 0:
raise ValueError(
"L{}:C{} is not valid ESTree position!".format(line, column)
)
self.line = line
self.column = column
class SourceLocation:
"""
The class for the source location information of a node.
Consists of a start position (the position of the first character of the parsed source region) and an end
position (the position of the first character after the parsed source region).
See Also:
Position
"""
def __init__(self, source: Optional[str], start: Position, end: Position):
self.source = source
self.start = start
self.end = end
class Node:
"""ESTree AST nodes are represented as Node objects, which may have any prototype inheritance but which implement
this interface.
The `type` field is a string representing the AST variant type. Each subtype of `Node` is documented below with
the specific string of its `type` field. You can use this field to determine which interface a node implements.
The `loc` field represents the source location information of the node. If the node contains no information about
the source location, the field is `None`; otherwise it contains a `SourceLocation` object.
See Also:
SourceLocation
"""
def __init__(self, node_type: str, loc: Optional[SourceLocation]):
self.type = node_type
self.loc = loc
# "Identifier" block
class Identifier(Expression, Pattern):
"""An identifier. Note that an identifier may be an expression or a destructuring pattern."""
def __init__(self, loc: Optional[SourceLocation], name: str):
super(Identifier, self).__init__("Identifier", loc)
self.name = name
# "Literal" block
class Literal(Expression):
"""A literal token. Note that a literal can be an expression."""
def __init__(
self, loc: Optional[SourceLocation], value: Union[str, bool, number, None]
):
super().__init__("Literal", loc)
self.value = value
# "Programs" block
class Program(Node):
"""A complete program source tree."""
def __init__(
self,
loc: Optional[SourceLocation],
source_type: SourceTypeLiteral,
body: List[Union[Directive, Statement]],
):
super().__init__("Program", loc)
self.body = body
self.source_type = source_type
# "Functions" block
class Function(Node):
"""A function declaration or expression.
See Also:
FunctionDeclaration
FunctionExpression
FunctionBody
"""
def __init__(
self,
node_type: str,
loc: Optional[SourceLocation],
function_id: Optional[Identifier],
params: List[Pattern],
body: FunctionBody,
):
super().__init__(node_type, loc)
self.id = function_id
self.params = params
self.body = body
# "Statements" block
class Statement(Node):
"""Any statement."""
def __init__(self, node_type: str, loc: Optional[SourceLocation]):
super().__init__(node_type, loc)
class EmptyStatement(Statement):
"""An empty statement, i.e., a solitary semicolon."""
def __init__(self, loc: Optional[SourceLocation]):
super().__init__("EmptyStatement", loc)
class BlockStatement(Statement):
"""A block statement, i.e., a sequence of statements surrounded by braces."""
def __init__(self, loc: Optional[SourceLocation], body: List[Statement]):
super().__init__("BlockStatement", loc)
self.body = body
class ExpressionStatement(Statement):
"""An expression statement, i.e., a statement consisting of a single expression."""
def __init__(self, loc: Optional[SourceLocation], expression: Expression):
super().__init__("ExpressionStatement", loc)
self.expression = expression
class Directive(Node):
"""A directive from the directive prologue of a script or function. The `directive` property is the raw string
source of the directive without quotes.
"""
def __init__(
self, loc: Optional[SourceLocation], expression: Literal, directive: str
):
super().__init__("Directive", loc)
self.expression = expression
self.directive = directive
class FunctionBody(BlockStatement):
"""The body of a function, which is a block statement that may begin with directives."""
def __init__(
self, loc: Optional[SourceLocation], body: List[Union[Directive, Statement]]
):
super().__init__(loc, body)
class ReturnStatement(Statement):
"""A `return` statement."""
def __init__(self, loc: Optional[SourceLocation], argument: Optional[Expression]):
super().__init__("ReturnStatement", loc)
self.argument = argument
class BreakStatement(Statement):
"""A `break` statement."""
def __init__(self, loc: Optional[SourceLocation], label: Optional[Identifier]):
super().__init__("BreakStatement", loc)
self.label = label
class ContinueStatement(Statement):
"""A `continue` statement."""
def __init__(self, loc: Optional[SourceLocation], label: Optional[Identifier]):
super().__init__("ContinueStatement", loc)
self.label = label
class IfStatement(Statement):
"""An `if` statement."""
def __init__(
self,
loc: Optional[SourceLocation],
test: Expression,
consequent: Statement,
alternate: Optional[Statement],
):
super().__init__("IfStatement", loc)
self.test = test
self.consequent = consequent
self.alternate = alternate
class WhileStatement(Statement):
"""A `while` statement."""
def __init__(
self, loc: Optional[SourceLocation], test: Expression, body: Statement
):
super().__init__("WhileStatement", loc)
self.test = test
self.body = body
class DoWhileStatement(Statement):
"""A `do`/`while` statement."""
def __init__(
self, loc: Optional[SourceLocation], body: Statement, test: Expression
):
super().__init__("DoWhileStatement", loc)
self.body = body
self.test = test
class ForStatement(Statement):
"""A `for` statement."""
def __init__(
self,
loc: Optional[SourceLocation],
init: Union[VariableDeclaration, Expression, None],
test: Optional[Expression],
update: Optional[Expression],
body: Statement,
):
super().__init__("ForStatement", loc)
self.init = init
self.test = test
self.update = update
self.body = body
class ForInStatement(Statement):
"""A `for`/`in` statement."""
def __init__(
self,
loc: Optional[SourceLocation],
left: Union[VariableDeclaration, Pattern],
right: Expression,
body: Statement,
):
super().__init__("ForInStatement", loc)
self.left = left
self.right = right
self.body = body
# "Declarations" block
class Declaration(Statement):
"""Any declaration node. Note that declarations are considered statements; this is because declarations can
appear in any statement context. """
def __init__(self, node_type: str, loc: Optional[SourceLocation]):
super().__init__(node_type, loc)
class FunctionDeclaration(Function, Declaration):
"""A function declaration. Note that unlike in the parent interface `Function`, the `id` cannot be `None`."""
def __init__(
self,
loc: Optional[SourceLocation],
function_id: Identifier,
params: List[Pattern],
body: FunctionBody,
):
super().__init__("FunctionDeclaration", loc, function_id, params, body)
class VariableDeclarator(Node):
"""A variable declarator."""
def __init__(
self, loc: Optional[SourceLocation], var_id: Pattern, init: Optional[Exception]
):
super().__init__("VariableDeclarator", loc)
self.id = var_id
self.init = init
class VariableDeclaration(Declaration):
"""A variable declaration."""
def __init__(
self,
loc: Optional[SourceLocation],
kind: VarDeclKind,
declarations: List[VariableDeclarator],
):
super().__init__("VariableDeclaration", loc)
self.declarations = declarations
self.kind = kind
# "Expressions" block
class Expression(Node):
"""Any expression node. Since the left-hand side of an assignment may be any expression in general, an expression
can also be a pattern.
See Also:
Pattern
"""
def __init__(self, node_type: str, loc: Optional[SourceLocation]):
super().__init__(node_type, loc)
class Super(Node):
"""A ``super`` pseudo-expression."""
def __init__(self, loc: Optional[SourceLocation]):
super().__init__("Super", loc)
class SpreadElement(Node):
"""Spread expression, e.g., ``[head, ...iter, tail]``, ``f(head, ...iter, ...tail)``."""
def __init__(self, loc: Optional[SourceLocation], argument: Expression):
super().__init__("SpreadElement", loc)
self.argument = argument
class ThisExpression(Expression):
"""A `this` expression."""
def __init__(self, loc: Optional[SourceLocation]):
super().__init__("ThisExpression", loc)
class ArrayExpression(Expression):
"""An array expression. An element might be `None` if it represents a hole in a sparse array. E.g. ``[1,,2]``."""
def __init__(
self,
loc: Optional[SourceLocation],
elements: List[Union[Expression, SpreadElement, None]],
):
super().__init__("ArrayExpression", loc)
self.elements = elements
class ObjectExpression(Expression):
"""An object expression."""
def __init__(self, loc: Optional[SourceLocation], properties: List[Property]):
super().__init__("ObjectExpression", loc)
self.properties = properties
class FunctionExpression(Function, Expression):
"""A function expression."""
def __init__(
self,
loc: Optional[SourceLocation],
function_id: Optional[Identifier],
params: List[Pattern],
body: FunctionBody,
):
super().__init__("FunctionExpression", loc, function_id, params, body)
class ArrowFunctionExpression(Function, Expression):
"""A fat arrow function expression, e.g., ``let foo = (bar) => { /* body */ }``."""
def __init__(
self,
loc: Optional[SourceLocation],
params: List[Pattern],
body: Union[FunctionBody, Expression],
expression: bool,
):
super().__init__("ArrowFunctionExpression", loc, None, params, body)
self.expression = expression
class UnaryExpression(Expression):
"""A unary operator expression."""
def __init__(
self,
loc: Optional[SourceLocation],
operator: UnaryOperator,
prefix: bool,
argument: Expression,
):
super().__init__("UnaryExpression", loc)
self.operator = operator
self.prefix = prefix
self.argument = argument
class UpdateExpression(Expression):
"""An update (increment or decrement) operator expression."""
def __init__(
self,
loc: Optional[SourceLocation],
operator: UpdateOperator,
argument: Expression,
prefix: bool,
):
super().__init__("UpdateExpression", loc)
self.operator = operator
self.argument = argument
self.prefix = prefix
class BinaryExpression(Expression):
"""A binary operator expression."""
def __init__(
self,
loc: Optional[SourceLocation],
operator: BinaryOperator,
left: Expression,
right: Expression,
):
super().__init__("BinaryExpression", loc)
self.operator = operator
self.left = left
self.right = right
class AssignmentExpression(Expression):
"""An assignment operator expression."""
def __init__(
self,
loc: Optional[SourceLocation],
operator: AssignmentOperator,
left: Union[
Pattern, Expression
], # Left for backwards compatibility with pre-ES6 code, should be `Pattern`
right: Expression,
):
super().__init__("AssignmentExpression", loc)
self.operator = operator
self.left = left
self.right = right
class LogicalExpression(Expression):
"""A logical operator expression."""
def __init__(
self,
loc: Optional[SourceLocation],
operator: LogicalOperator,
left: Union[Pattern, Expression],
right: Expression,
):
super().__init__("LogicalExpression", loc)
self.operator = operator
self.left = left
self.right = right
class MemberExpression(Expression, Pattern):
"""A member expression. If `computed` is ``True``, the node corresponds to a computed (``a[b]``) member
expression and `property` is an `Expression`. If `computed` is `False`, the node corresponds to a static
(``a.b``) member expression and `property` is an `Identifier`. """
def __init__(
self,
loc: Optional[SourceLocation],
member_object: Union[Expression, Super],
member_property: Expression,
computed: bool,
):
super().__init__("MemberExpression", loc)
self.object = member_object
self.property = member_property
self.computed = computed
class ConditionalExpression(Expression):
"""A conditional expression, i.e., a ternary ``?``/``:`` expression."""
def __init__(
self,
loc: Optional[SourceLocation],
test: Expression,
alternate: Expression,
consequent: Expression,
):
super().__init__("ConditionalExpression", loc)
self.test = test
self.alternate = alternate
self.consequent = consequent
class CallExpression(Expression):
"""A function or method call expression."""
def __init__(
self,
loc: Optional[SourceLocation],
callee: Union[Expression, Super],
arguments: List[Union[Expression, SpreadElement]],
):
super().__init__("CallExpression", loc)
self.callee = callee
self.arguments = arguments
class NewExpression(Expression):
"""A ``new`` expression."""
def __init__(
self,
loc: Optional[SourceLocation],
callee: Expression,
arguments: List[Union[Expression, SpreadElement]],
):
super().__init__("NewExpression", loc)
self.callee = callee
self.arguments = arguments
class SequenceExpression(Expression):
"""A sequence expression, i.e., a comma-separated sequence of expressions."""
def __init__(self, loc: Optional[SourceLocation], expressions: List[Expression]):
super().__init__("SequenceExpression", loc)
self.expressions = expressions
def _generate_unary_expression(operator: UnaryOperator, docstring: str):
"""Internal function to generate unary expression AST node.
Implying that all UnaryExpression nodes are prefix.
"""
class Expr(UnaryExpression):
__doc__ = docstring
def __init__(self, loc: Optional[SourceLocation], argument: Expression):
super().__init__(loc, operator, True, argument)
return Expr
def _generate_update_expression(operator: UpdateOperator, prefix: bool, docstring: str):
"""Internal function to generate update expression AST node."""
class Expr(UpdateExpression):
__doc__ = docstring
def __init__(self, loc: Optional[SourceLocation], argument: Expression):
super().__init__(loc, operator, argument, prefix)
return Expr
def _generate_binary_expression(operator: BinaryOperator, docstring: str):
"""Internal function to generate binary expression AST node."""
class Expr(BinaryExpression):
__doc__ = docstring
def __init__(
self, loc: Optional[SourceLocation], left: Expression, right: Expression
):
super().__init__(loc, operator, left, right)
return Expr
def _generate_assignment_expression(operator: AssignmentOperator, docstring: str):
"""Internal function to generate assignment expression AST node."""
class Expr(AssignmentExpression):
__doc__ = docstring
def __init__(
self,
loc: Optional[SourceLocation],
left: Union[Pattern, Expression],
right: Expression,
):
super().__init__(loc, operator, left, right)
return Expr
def _generate_logical_expression(operator: LogicalOperator, docstring: str):
"""Internal function to generate logical expression AST node."""
class Expr(LogicalExpression):
__doc__ = docstring
def __init__(
self,
loc: Optional[SourceLocation],
left: Union[Pattern, Expression],
right: Expression,
):
super().__init__(loc, operator, left, right)
return Expr
UnaryMinusExpression = _generate_unary_expression(
UnaryOperator.MINUS, """A unary minus expression."""
)
UnaryPlusExpression = _generate_unary_expression(
UnaryOperator.PLUS, """A unary plus expression."""
)
UnaryLogicNotExpression = _generate_unary_expression(
UnaryOperator.NOT_LOGIC, """A unary logic "not" expression."""
)
UnaryBitNotExpression = _generate_unary_expression(
UnaryOperator.NOT_BIT, """A unary bit "not" expression."""
)
TypeofExpression = _generate_unary_expression(
UnaryOperator.TYPEOF, """A `typeof` expression."""
)
VoidExpression = _generate_unary_expression(
UnaryOperator.VOID, """A `void` expression."""
)
DeleteExpression = _generate_unary_expression(
UnaryOperator.DELETE, """A `delete` expression."""
)
PreIncrementExpression = _generate_update_expression(
UpdateOperator.INCREMENT, True, """A pre-increment expression."""
)
PostIncrementExpression = _generate_update_expression(
UpdateOperator.INCREMENT, False, """A post-increment expression."""
)
PreDecrementExpression = _generate_update_expression(
UpdateOperator.DECREMENT, True, """A pre-decrement expression."""
)
PostDecrementExpression = _generate_update_expression(
UpdateOperator.DECREMENT, False, """A post-decrement expression."""
)
EqualityExpression = _generate_binary_expression(
BinaryOperator.EQ, """An equality expression."""
)
NotEqualityExpression = _generate_binary_expression(
BinaryOperator.NEQ, """A "not equality" expression."""
)
IdentityEqualityExpression = _generate_binary_expression(
BinaryOperator.EQ_IDENTITY, """An identity equality expression."""
)
NotIdentityEqualityExpression = _generate_binary_expression(
BinaryOperator.NEQ_IDENTITY, """A "not identity equality" expression."""
)
LowerThanRelationExpression = _generate_binary_expression(
BinaryOperator.LT, """A "lower than" expression."""
)
LowerThanEqualRelationExpression = _generate_binary_expression(
BinaryOperator.LTE, """A "lower than or equal" expression."""
)
GreaterThanRelationExpression = _generate_binary_expression(
BinaryOperator.GT, """A "greater than" expression."""
)
GreaterThanEqualRelationExpression = _generate_binary_expression(
BinaryOperator.GTE, """A "greater than or equal" expression."""
)
LeftBitShiftExpression = _generate_binary_expression(
BinaryOperator.SHL, """A "left bit shift" expression."""
)
RightBitShiftExpression = _generate_binary_expression(
BinaryOperator.SHR, """A "right bit shift" expression."""
)
LogicRightBitShiftExpression = _generate_binary_expression(
BinaryOperator.SHR_LOGIC, """A "logical right bit shift" expression."""
)
AddArithmeticExpression = _generate_binary_expression(
BinaryOperator.ADD, """An addition arithmetical expression."""
)
SubArithmeticExpression = _generate_binary_expression(
BinaryOperator.SUB, """A subtraction arithmetical expression."""
)
MulArithmeticExpression = _generate_binary_expression(
BinaryOperator.MUL, """A multiplication arithmetical expression."""
)
DivArithmeticExpression = _generate_binary_expression(
BinaryOperator.DIV, """A division arithmetical expression."""
)
ModArithmeticExpression = _generate_binary_expression(
BinaryOperator.MOD, """A modulo arithmetical expression."""
)
OrBitExpression = _generate_binary_expression(
BinaryOperator.OR, """An "or" bit expression."""
)
XorBitExpression = _generate_binary_expression(
BinaryOperator.XOR, """A "xor" bit expression."""
)
AndBitExpression = _generate_binary_expression(
BinaryOperator.AND, """An "and" bit expression."""
)
InExpression = _generate_binary_expression(BinaryOperator.IN, """An "in" expression.""")
InstanceofExpression = _generate_binary_expression(
BinaryOperator.INSTANCEOF, """An "instanceof" expression."""
)
SimpleAssignExpression = _generate_assignment_expression(
AssignmentOperator.ASSIGN, """An assignment done with operator ``=`` expression."""
)
AddAssignExpression = _generate_assignment_expression(
AssignmentOperator.ADD,
"""An addition assignment done with operator ``+=`` expression.""",
)
SubAssignExpression = _generate_assignment_expression(
AssignmentOperator.SUB,
"""A subtraction assignment done with operator ``-=`` expression.""",
)
MulAssignExpression = _generate_assignment_expression(
AssignmentOperator.MUL,
"""A multiplication assignment done with operator ``*=`` expression.""",
)
ModAssignExpression = _generate_assignment_expression(
AssignmentOperator.DIV,
"""A modulo assignment done with operator ``%=`` expression.""",
)
ShlAssignExpression = _generate_assignment_expression(
AssignmentOperator.SHL,
"""A left shift assignment done with operator ``<<=`` expression.""",
)
ShrAssignExpression = _generate_assignment_expression(
AssignmentOperator.SHR,
"""A right shift assignment done with operator ``>>=`` expression.""",
)
LogicShrAssignExpression = _generate_assignment_expression(
AssignmentOperator.SHR_LOGIC,
"""A logical right shift assignment done with operator ``>>>=`` expression.""",
)
OrAssignExpression = _generate_assignment_expression(
AssignmentOperator.OR,
"""A "bit or" assignment done with operator ``|=`` expression.""",
)
XorAssignExpression = _generate_assignment_expression(
AssignmentOperator.XOR,
"""A "bit xor" assignment done with operator ``^=`` expression.""",
)
AndAssignExpression = _generate_assignment_expression(
AssignmentOperator.AND,
"""A "bit and" assignment done with operator ``&=`` expression.""",
)
OrLogicExpression = _generate_logical_expression(
LogicalOperator.OR, """An "or" logical expression."""
)
AndLogicExpression = _generate_logical_expression(
LogicalOperator.AND, """An "and" logical expression."""
)
# "Property" block
class Property(Node):
"""A literal property in an object expression can have either a string or number as its `value`. Ordinary
property initializers have a `kind` value ``"init"``; getters and setters have the kind values ``"get"`` and
``"set"``, respectively. """
def __init__(
self,
loc: Optional[SourceLocation],
key: Union[Literal, Identifier],
value: Expression,
kind: PropKind,
method: bool,
shorthand: bool,
computed: bool,
):
super().__init__("Property", loc)
self.key = key
self.value = value
self.kind = kind
self.method = method
self.shorthand = shorthand
self.computed = computed
class AssignmentProperty(Property):
def __init__(
self,
loc: Optional[SourceLocation],
key: Union[Literal, Identifier],
value: Pattern,
shorthand: bool,
computed: bool,
):
super().__init__(loc, key, value, "init", False, shorthand, computed)
# "Patterns" block
#
# Destructuring binding and assignment are not part of ES5, but all binding positions accept Pattern
# to allow for destructuring in ES6. Nevertheless, for ES5, the only Pattern subtype is Identifier.
class Pattern(Node):
"""A pattern."""
def __init__(self, node_type: str, loc: Optional[SourceLocation]):
super().__init__(node_type, loc)
class ObjectPatternKeyValue(TypedDict):
key: Union[Literal, Identifier]
value: Pattern
class ObjectPattern(Pattern):
def __init__(
self, loc: Optional[SourceLocation], properties: List[ObjectPatternKeyValue]
):
super().__init__("ObjectPattern", loc)
self.properties = properties
class ArrayPattern(Pattern):
def __init__(
self, loc: Optional[SourceLocation], elements: List[Optional[Pattern]]
):
super().__init__("ArrayPattern", loc)
self.elements = elements

View File

@ -0,0 +1,149 @@
import logging
from typing import Optional, List
import antlr4.ParserRuleContext
from lex.JavaScriptParser import JavaScriptParser
from lex.JavaScriptParserListener import JavaScriptParserListener as JSBaseListener
import ast.nodes
def _get_source_location(
ctx: antlr4.ParserRuleContext, source: Optional[str]
) -> ast.nodes.SourceLocation:
"""Internal function to obtain `SourceObject` from parser context."""
start_pos = ast.nodes.Position(ctx.start.line, ctx.start.column)
end_pos = ast.nodes.Position(ctx.stop.line, ctx.stop.column)
# If an end is not on a newline, shift end position column by 1
# to match exact token end, not the last character
if end_pos.column != 0:
end_pos.column += 1
return ast.nodes.SourceLocation(source=source, start=start_pos, end=end_pos)
class StatementListener(JSBaseListener):
_stmt: ast.nodes.Statement
@property
def statement(self) -> ast.nodes.Statement:
"""Statement AST node generated after parse tree walking."""
return self._stmt
def enterStatement(self, ctx: JavaScriptParser.StatementContext):
"""Obtain an actual statement."""
logging.debug("Entered section Statement")
ctx.getChild(0).enterRule(self)
def enterBlock(self, ctx: JavaScriptParser.BlockContext):
"""Listener for BlockStatement."""
logging.debug("Entered section Block")
stmt_list: List[ast.nodes.Statement] = []
for stmt in ctx.statementList().children:
stmt_listener = StatementListener()
stmt.enterRule(stmt_listener)
stmt_list.append(stmt_listener.statement)
loc = _get_source_location(ctx, None) # FIXME source param is None
self._stmt = ast.nodes.BlockStatement(loc, stmt_list)
def enterVariableDeclarationList(
self, ctx: JavaScriptParser.VariableDeclarationListContext
):
"""Listener for VariableDeclaration."""
logging.debug("Entered section VariableDeclaration")
pass
def enterEmptyStatement(self, ctx: JavaScriptParser.EmptyStatementContext):
"""Listener for EmptyStatement."""
logging.debug("Entered section EmptyStatement")
pass
def enterExpressionStatement(
self, ctx: JavaScriptParser.ExpressionStatementContext
):
"""Listener for ExpressionStatement.
TODO: check up expression containers.
"""
logging.debug("Entered section ExpressionStatement")
pass
def enterIfStatement(self, ctx: JavaScriptParser.IfStatementContext):
"""Listener for IfStatement."""
logging.debug("Entered section IfStatement")
pass
def enterFunctionDeclaration(
self, ctx: JavaScriptParser.FunctionDeclarationContext
):
"""Listener for FunctionDeclaration."""
logging.debug("Entered section FunctionDeclaration")
pass
# TODO: import/export, ClassDeclaration, iter statements, continue. break, return
class SourceElementListener(JSBaseListener):
"""The proxy between Program and Statement."""
_elems: List[ast.nodes.Statement] = []
@property
def source_elements(self) -> List[ast.nodes.Statement]:
"""Source elements AST nodes generated after parse tree walking."""
return self._elems
def enterSourceElement(self, ctx: JavaScriptParser.SourceElementContext):
logging.debug("Entered section Source Element")
stmt_listener = StatementListener()
stmt = ctx.statement()
stmt.enterRule(stmt_listener)
self._elems.append(stmt_listener.statement)
class ASTListener(JSBaseListener):
"""AST listener."""
_program_node: Optional[ast.nodes.Program] = None
_source_type: ast.nodes.SourceTypeLiteral
@property
def program_node(self) -> ast.nodes.Program:
"""The `Program` AST node generated after parse tree walking."""
if self._program_node is None:
raise ValueError("Program AST node is None, did you run the listener?")
return self._program_node
def __init__(self, source_type: ast.nodes.SourceTypeLiteral = "script"):
"""AST listener constructor.
Args:
source_type (ast.nodes.SourceTypeLiteral): source type. Could be `script` or `module`. Set to
`script` by default.
"""
self._source_type = source_type
def enterProgram(self, ctx: JavaScriptParser.ProgramContext):
logging.debug("Entered section Program")
logging.debug("JS source type: %s", self._source_type)
hashbang = ctx.HashBangLine()
if hashbang is not None:
hashbang_exec = hashbang.getText()[2:]
logging.debug('Found a hashbang "%s"', hashbang_exec)
# TODO treat it somehow
source_elem_listener = SourceElementListener()
for elem in ctx.sourceElements().children:
elem.enterRule(source_elem_listener)
loc = _get_source_location(ctx, None) # FIXME add source name
self._program_node = ast.nodes.Program(
loc, self._source_type, source_elem_listener.source_elements
)

110
jasminesnake/js_stream.py Normal file
View File

@ -0,0 +1,110 @@
"""A module for JavaScript code stream creation and its parsing. """
from antlr4 import InputStream, CommonTokenStream, FileStream, StdinStream
from antlr4.error.ErrorListener import ErrorListener
from .lex import JavaScriptLexer, JavaScriptParser
JSL = JavaScriptLexer.JavaScriptLexer
JSP = JavaScriptParser.JavaScriptParser
class JSBaseStream:
"""JavaScript stream base class.
Notes:
Do not instantiate the base class.
See Also:
JSFileStream
JSStringStream
JSStdinStream
"""
_input_stream: InputStream = None
_error_listener = None
lexer = None
parser = None
def __init__(self, error_listener):
if self is JSBaseStream:
raise TypeError(
"JSReader is a base class, you should instantiate its subclasses instead."
)
self._error_listener = error_listener
def parse(self) -> JSP.ProgramContext:
"""Parse the stream.
Returns:
Program context.
"""
self.lexer = JSL(self._input_stream)
stream = CommonTokenStream(self.lexer)
self.parser = JSP(stream)
# Register error listener if present
if self._error_listener is not None:
self.parser.removeErrorListeners()
self.parser.addErrorListener(self._error_listener)
return self.parser.program()
class JSStringStream(JSBaseStream):
"""JavaScript string stream.
See Also:
JSBaseStream
JSFileStream
JSStdinStream
"""
def __init__(self, string: str, error_listener: ErrorListener = None):
"""Instantiate a string stream.
Args:
string (str): The string with JavaScript code.
error_listener (ErrorListener): The custom error listener. Uses default one if not set or set to None.
"""
super().__init__(error_listener)
self._input_stream = InputStream(string)
class JSStdinStream(JSBaseStream):
"""JavaScript stdin stream.
See Also:
JSBaseStream
JSFileStream
JSStringStream
"""
def __init__(self, error_listener: ErrorListener = None):
"""Instantiate a string stream.
Args:
error_listener (ErrorListener): The custom error listener. Uses default one if not set or set to None.
"""
super().__init__(error_listener)
self._input_stream = StdinStream("utf-8")
class JSFileStream(JSBaseStream):
"""JavaScript file stream.
See Also:
JSBaseStream
JSStringStream
JSStdinStream
"""
def __init__(self, path: str, error_listener: ErrorListener = None):
"""Instantiate a string stream.
Args:
path (str): The path to the file with JavaScript code.
error_listener (ErrorListener): The custom error listener. Uses default one if not set or set to None.
"""
super().__init__(error_listener)
self._input_stream = FileStream(path)

View File

@ -0,0 +1,37 @@
from antlr4.error.ErrorListener import ErrorListener
import logging
class LogErrorListener(ErrorListener):
def __init__(self):
super().__init__()
def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
logging.debug(
"{}\n{}\n{}\n{}\n{}".format(offendingSymbol, line, column, msg, e)
)
def reportAmbiguity(
self, recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs
):
logging.debug(
"{}\n{}\n{}\n{}\n{}\n{}".format(
dfa, startIndex, stopIndex, exact, ambigAlts, configs
)
)
def reportAttemptingFullContext(
self, recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs
):
logging.debug(
"{}; {}; {}; {}; {}".format(
dfa, startIndex, stopIndex, conflictingAlts, configs
)
)
def reportContextSensitivity(
self, recognizer, dfa, startIndex, stopIndex, prediction, configs
):
logging.debug(
"{}; {}; {}; {}; {}".format(dfa, startIndex, stopIndex, prediction, configs)
)

View File

@ -1,4 +1,5 @@
from antlr4 import *
import logging
relativeImport = False
if __name__ is not None and "." in __name__:
@ -7,7 +8,7 @@ if __name__ is not None and "." in __name__:
class JavaScriptBaseLexer(Lexer):
def __init__(self, *args, **kwargs):
print("JavaScriptBaseLexerInit")
logging.debug("JavaScriptBaseLexerInit")
super(JavaScriptBaseLexer, self).__init__(*args, **kwargs)
"""Stores values of nested modes. By default mode is strict or

View File

@ -1,4 +1,5 @@
from antlr4 import *
import logging
relativeImport = False
if __name__ is not None and "." in __name__:

View File

@ -1,2 +1,4 @@
antlr4-python3-runtime
antlr4-python3-runtime==4.8
colorama==0.4.3
coloredlogs==14.0
tree_format==0.1.2