From c7116f9bd0471f8638b888472426e383f64cbcdc Mon Sep 17 00:00:00 2001 From: Uko Kokņevičs Date: Sun, 18 Aug 2024 23:29:37 +0800 Subject: Some more modularisation --- .../src/main/java/lv/enes/orang/parser/Parser.java | 339 +++++++++++++++++++++ .../java/lv/enes/orang/parser/ParserException.java | 9 + parser/src/main/java/module-info.java | 8 + 3 files changed, 356 insertions(+) create mode 100644 parser/src/main/java/lv/enes/orang/parser/Parser.java create mode 100644 parser/src/main/java/lv/enes/orang/parser/ParserException.java create mode 100644 parser/src/main/java/module-info.java (limited to 'parser/src/main/java') diff --git a/parser/src/main/java/lv/enes/orang/parser/Parser.java b/parser/src/main/java/lv/enes/orang/parser/Parser.java new file mode 100644 index 0000000..6c86e85 --- /dev/null +++ b/parser/src/main/java/lv/enes/orang/parser/Parser.java @@ -0,0 +1,339 @@ +package lv.enes.orang.parser; + +import lv.enes.orang.ast.*; +import lv.enes.orang.ast.IfElseExpression; +import lv.enes.orang.ast.Statement; +import lv.enes.orang.lexer.Lexer; +import lv.enes.orang.lexer.Token; +import lv.enes.orang.utils.NonEmptyList; +import lv.enes.orang.utils.PeekableStream; + +import java.io.InputStream; +import java.io.Reader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.function.Predicate; + +public class Parser { + public static Program parseProgram(InputStream in) throws ParserException { + var parser = new Parser(in); + return parser.parseProgram(); + } + + public static Program parseProgram(Reader in) throws ParserException { + var parser = new Parser(in); + return parser.parseProgram(); + } + + public static Program parseProgram(String in) throws ParserException { + var parser = new Parser(in); + return parser.parseProgram(); + } + + private final PeekableStream input; + + public Parser(InputStream in) { + this(new Lexer(in)); + } + + public Parser(Reader in) { + this(new Lexer(in)); + } + + public Parser(String in) { + this(new Lexer(in)); + } + + public Parser(Iterator input) { + this.input = new PeekableStream<>(input); + } + + public Program parseProgram() throws ParserException { + var statements = new ArrayList(); + while (!maybeConsumeToken(Token.Type.EOF)) { + statements.add(parseStatement()); + maybeConsumeToken(Token.Type.SEMICOLON); + } + return new Program(Collections.unmodifiableList(statements)); + } + + private static boolean isBinaryOp(Token token) { + return switch (token.type()) { + case ASTERISK, SLASH, PLUS, MINUS, QUESTION_EQUAL, SLASH_EQUAL, GREATER, GREATER_EQUAL, LESS, LESS_EQUAL + -> true; + default -> false; + }; + } + + private static BinaryExpression.Operator toBinaryOp(Token token) { + return switch (token.type()) { + case ASTERISK -> BinaryExpression.Operator.MULTIPLY; + case SLASH -> BinaryExpression.Operator.DIVIDE; + case PLUS -> BinaryExpression.Operator.ADD; + case MINUS -> BinaryExpression.Operator.SUBTRACT; + case QUESTION_EQUAL -> BinaryExpression.Operator.EQUALS; + case SLASH_EQUAL -> BinaryExpression.Operator.NOT_EQUALS; + case GREATER -> BinaryExpression.Operator.GT; + case GREATER_EQUAL -> BinaryExpression.Operator.GTE; + case LESS -> BinaryExpression.Operator.LT; + case LESS_EQUAL -> BinaryExpression.Operator.LTE; + default -> throw new IllegalStateException(STR."Token \{token.type()} is not a binary operator"); + }; + } + + public static boolean isUnaryOp(Token token) { + return switch (token.type()) { + case PLUS, MINUS, BANG -> true; + default -> false; + }; + } + + public static UnaryExpression.Operator toUnaryOp(Token token) { + return switch (token.type()) { + case PLUS -> UnaryExpression.Operator.PLUS; + case MINUS -> UnaryExpression.Operator.NEGATE; + case BANG -> UnaryExpression.Operator.NOT; + default -> throw new IllegalStateException(STR."Token \{token.type()} is not a unary operator"); + }; + } + + private Token consume(Predicate pred, String msg) throws ParserException { + var tok = input.next(); + if (!pred.test(tok)) { + throw new ParserException(STR."\{msg}, got \{tok}"); + } + return tok; + } + + private Token consumeToken(Token.Type type) throws ParserException { + return consume(tok -> tok.type() == type, STR."Expected \{type}"); + } + + private boolean maybeConsumeToken(Token.Type type) { + if (input.peek().type() == type) { + input.next(); + return true; + } + return false; + } + + private ArrayExpression parseArray() throws ParserException { + consumeToken(Token.Type.BRACKET_LEFT); + if (maybeConsumeToken(Token.Type.BRACKET_RIGHT)) { + return new ArrayExpression(List.of()); + } + + var items = new ArrayList(); + do { + items.add(parseExpression()); + } while (maybeConsumeToken(Token.Type.COMMA)); + consumeToken(Token.Type.BRACKET_RIGHT); + + return new ArrayExpression(Collections.unmodifiableList(items)); + } + + private List parseArgSpecs() throws ParserException { + var argSpecs = new ArrayList(); + while (true) { + if (input.peek().type() == Token.Type.IDENTIFIER) { + argSpecs.add(ArgSpec.named(input.next().literal())); + } else if (input.peek().type() == Token.Type.PAREN_LEFT) { + consumeToken(Token.Type.PAREN_LEFT); + consumeToken(Token.Type.PAREN_RIGHT); + argSpecs.add(ArgSpec.nothing()); + } else { + break; + } + } + return Collections.unmodifiableList(argSpecs); + } + + private Expression parseBinaryExpression() throws ParserException { + var lhs = parseCallExpression(); + if (!isBinaryOp(input.peek())) { + return lhs; + } + + return parseBinaryExpressionRhs(lhs, toBinaryOp(input.next())); + } + + private Expression parseBinaryExpressionRhs(Expression lhs, BinaryExpression.Operator op) throws ParserException { + var rhs = parseCallExpression(); + if (!isBinaryOp(input.peek())) { + return new BinaryExpression(op, lhs, rhs); + } + + var op2 = toBinaryOp(input.next()); + if (op2.bindsStrongerThan(op)) { + return new BinaryExpression(op, lhs, parseBinaryExpressionRhs(rhs, op2)); + } else { + return parseBinaryExpressionRhs(new BinaryExpression(op, lhs, rhs), op2); + } + } + + private BooleanLiteral parseBoolean() throws ParserException { + var t = consume(tok -> tok.type() == Token.Type.FALSE || tok.type() == Token.Type.TRUE, "Expected TRUE or FALSE"); + return new BooleanLiteral(t.type() == Token.Type.TRUE); + } + + private Expression parseCallExpression() throws ParserException { + var callee = parseSimpleExpression(); + while (couldStartSimpleExpression(input.peek().type())) { + var arg = parseSimpleExpression(); + callee = new CallExpression(callee, arg); + } + return callee; + } + + private Definition parseDefinition() throws ParserException { + consumeToken(Token.Type.DEF); + var defSpec = parseDefSpec(); + consumeToken(Token.Type.EQUAL); + var value = parseExpression(); + if (defSpec.args().isEmpty()) { + return new Definition(defSpec.name(), value); + } else { + return new Definition(defSpec.name(), new FnExpression(new NonEmptyList<>(defSpec.args()), value)); + } + } + + private DefSpec parseDefSpec() throws ParserException { + var name = consumeToken(Token.Type.IDENTIFIER).literal(); + var argSpecs = parseArgSpecs(); + return new DefSpec(name, argSpecs); + } + + private DoExpression parseDoExpression() throws ParserException { + consumeToken(Token.Type.DO); + var exprs = new ArrayList(); + do { + exprs.add(parseExpression()); + } while (maybeConsumeToken(Token.Type.SEMICOLON)); + consumeToken(Token.Type.END); + return new DoExpression(Collections.unmodifiableList(exprs)); + } + + private Expression parseExpression() throws ParserException { + if (isUnaryOp(input.peek())) { + return parseUnaryExpression(); + } + return parseBinaryExpression(); + } + + private FnExpression parseFnExpression() throws ParserException { + consumeToken(Token.Type.FN); + var argSpecs = parseArgSpecs(); + if (argSpecs.isEmpty()) { + throw new ParserException("Function definition with no arguments"); + } + var body = maybeConsumeToken(Token.Type.MINUS_GREATER) ? parseExpression() : parseDoExpression(); + return new FnExpression(new NonEmptyList<>(argSpecs), body); + } + + private IfElseExpression parseIfElseExpression() throws ParserException { + consumeToken(Token.Type.IF); + var cond = parseExpression(); + consumeToken(Token.Type.THEN); + var trueBranch = parseExpression(); + consumeToken(Token.Type.ELSE); + var falseBranch = parseExpression(); + return new IfElseExpression(cond, trueBranch, falseBranch); + } + + private IntLiteral parseInteger() throws ParserException { + var tok = consumeToken(Token.Type.INTEGER); + return new IntLiteral(Integer.parseInt(tok.literal())); + } + + private LetInExpression parseLetInExpression() throws ParserException { + consumeToken(Token.Type.LET); + var bindings = new ArrayList(); + do { + var defSpec = parseDefSpec(); + consumeToken(Token.Type.EQUAL); + var value = parseExpression(); + if (defSpec.args().isEmpty()) { + bindings.add(new LetInExpression.Binding(defSpec.name(), value)); + } else { + var fn = new FnExpression(new NonEmptyList<>(defSpec.args()), value); + bindings.add(new LetInExpression.Binding(defSpec.name(), fn)); + } + } while (maybeConsumeToken(Token.Type.AND)); + consumeToken(Token.Type.IN); + var body = parseExpression(); + return new LetInExpression(Collections.unmodifiableList(bindings), body); + } + + private Expression parseSimpleExpression() throws ParserException { + return switch (input.peek().type()) { + case PAREN_LEFT -> { + consumeToken(Token.Type.PAREN_LEFT); + if (maybeConsumeToken(Token.Type.PAREN_RIGHT)) { + yield VoidExpression.INSTANCE; + } + var expr = parseExpression(); + consumeToken(Token.Type.PAREN_RIGHT); + yield expr; + } + case TRUE, FALSE -> parseBoolean(); + case INTEGER -> parseInteger(); + case IDENTIFIER -> new VariableExpression(input.next().literal()); + case STRING -> parseString(); + case BRACKET_LEFT -> parseArray(); + case IF -> parseIfElseExpression(); + case LET -> parseLetInExpression(); + case FN -> parseFnExpression(); + case DO -> parseDoExpression(); + default -> throw new ParserException(STR."Unexpected token \{input.peek()}"); + }; + } + + private boolean couldStartSimpleExpression(Token.Type type) { + return switch (type) { + case PAREN_LEFT, TRUE, FALSE, INTEGER, IDENTIFIER, STRING, BRACKET_LEFT, IF, LET, FN, DO -> true; + default -> false; + }; + } + + private Statement parseStatement() throws ParserException { + if (input.peek().type() == Token.Type.DEF) { + return parseDefinition(); + } else { + return new ExpressionStatement(parseExpression()); + } + } + + private Expression parseString() throws ParserException { + var sb = new StringBuilder(); + var cps = input.next().literal().codePoints().iterator(); + while (cps.hasNext()) { + var cp = cps.next(); + if (cp == '\\') { + var escapeChar = cps.next(); + //noinspection UnnecessaryUnboxing + sb.append(switch (escapeChar.intValue()) { + case '\'' -> '\''; + case '"' -> '"'; + case 'r' -> '\r'; + case 'n' -> '\n'; + case 't' -> '\t'; + default -> throw new ParserException(STR."Unknown string escape '\\\{escapeChar}'"); + }); + } else { + sb.appendCodePoint(cp); + } + } + return new StringLiteral(sb.toString()); + } + + private Expression parseUnaryExpression() throws ParserException { + if (isUnaryOp(input.peek())) { + var op = toUnaryOp(input.next()); + return new UnaryExpression(op, parseUnaryExpression()); + } else { + return parseSimpleExpression(); + } + } +} diff --git a/parser/src/main/java/lv/enes/orang/parser/ParserException.java b/parser/src/main/java/lv/enes/orang/parser/ParserException.java new file mode 100644 index 0000000..632ce15 --- /dev/null +++ b/parser/src/main/java/lv/enes/orang/parser/ParserException.java @@ -0,0 +1,9 @@ +package lv.enes.orang.parser; + +import lv.enes.orang.core.OrangException; + +public class ParserException extends OrangException { + public ParserException(String message) { + super(message); + } +} diff --git a/parser/src/main/java/module-info.java b/parser/src/main/java/module-info.java new file mode 100644 index 0000000..43c2dc5 --- /dev/null +++ b/parser/src/main/java/module-info.java @@ -0,0 +1,8 @@ +module lv.enes.orang.parser { + exports lv.enes.orang.parser; + + requires lv.enes.orang.ast; + requires lv.enes.orang.core; + requires lv.enes.orang.lexer; + requires lv.enes.orang.utils; +} \ No newline at end of file -- cgit v1.2.3