From c7116f9bd0471f8638b888472426e383f64cbcdc Mon Sep 17 00:00:00 2001 From: Uko Kokņevičs Date: Sun, 18 Aug 2024 23:29:37 +0800 Subject: Some more modularisation --- lexer/build.gradle.kts | 19 ++ lexer/src/main/java/lv/enes/orang/lexer/Lexer.java | 197 ++++++++++++ lexer/src/main/java/lv/enes/orang/lexer/Token.java | 62 ++++ lexer/src/main/java/module-info.java | 5 + orang/build.gradle.kts | 1 + orang/src/main/java/lv/enes/orang/Codepoint.java | 28 -- orang/src/main/java/lv/enes/orang/Lexer.java | 174 ----------- orang/src/main/java/lv/enes/orang/Main.java | 1 + orang/src/main/java/lv/enes/orang/Parser.java | 296 ------------------ .../main/java/lv/enes/orang/ParserException.java | 9 - .../main/java/lv/enes/orang/PeekableStream.java | 38 --- orang/src/main/java/lv/enes/orang/Token.java | 15 - orang/src/main/java/lv/enes/orang/TokenType.java | 91 ------ orang/src/main/java/module-info.java | 1 + parser/build.gradle.kts | 22 ++ .../src/main/java/lv/enes/orang/parser/Parser.java | 339 +++++++++++++++++++++ .../java/lv/enes/orang/parser/ParserException.java | 9 + parser/src/main/java/module-info.java | 8 + settings.gradle.kts | 2 +- .../main/java/lv/enes/orang/utils/Codepoint.java | 8 + .../java/lv/enes/orang/utils/PeekableStream.java | 38 +++ 21 files changed, 711 insertions(+), 652 deletions(-) create mode 100644 lexer/build.gradle.kts create mode 100644 lexer/src/main/java/lv/enes/orang/lexer/Lexer.java create mode 100644 lexer/src/main/java/lv/enes/orang/lexer/Token.java create mode 100644 lexer/src/main/java/module-info.java delete mode 100644 orang/src/main/java/lv/enes/orang/Codepoint.java delete mode 100644 orang/src/main/java/lv/enes/orang/Lexer.java delete mode 100644 orang/src/main/java/lv/enes/orang/Parser.java delete mode 100644 orang/src/main/java/lv/enes/orang/ParserException.java delete mode 100644 orang/src/main/java/lv/enes/orang/PeekableStream.java delete mode 100644 orang/src/main/java/lv/enes/orang/Token.java delete mode 100644 orang/src/main/java/lv/enes/orang/TokenType.java create mode 100644 parser/build.gradle.kts create mode 100644 parser/src/main/java/lv/enes/orang/parser/Parser.java create mode 100644 parser/src/main/java/lv/enes/orang/parser/ParserException.java create mode 100644 parser/src/main/java/module-info.java create mode 100644 utils/src/main/java/lv/enes/orang/utils/Codepoint.java create mode 100644 utils/src/main/java/lv/enes/orang/utils/PeekableStream.java diff --git a/lexer/build.gradle.kts b/lexer/build.gradle.kts new file mode 100644 index 0000000..7fe8777 --- /dev/null +++ b/lexer/build.gradle.kts @@ -0,0 +1,19 @@ +plugins { + java +} + +dependencies { + implementation(project(":utils")) +} + +java { + sourceCompatibility = JavaVersion.VERSION_22 + targetCompatibility = JavaVersion.VERSION_22 + toolchain { + languageVersion = JavaLanguageVersion.of(22) + } +} + +tasks.withType { + options.compilerArgs.add("--enable-preview") +} \ No newline at end of file diff --git a/lexer/src/main/java/lv/enes/orang/lexer/Lexer.java b/lexer/src/main/java/lv/enes/orang/lexer/Lexer.java new file mode 100644 index 0000000..8fec98e --- /dev/null +++ b/lexer/src/main/java/lv/enes/orang/lexer/Lexer.java @@ -0,0 +1,197 @@ +package lv.enes.orang.lexer; + +import lv.enes.orang.utils.Codepoint; +import lv.enes.orang.utils.PeekableStream; + +import java.io.*; +import java.util.Iterator; +import java.util.function.BiFunction; +import java.util.function.Predicate; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +public class Lexer implements Iterator { + public static boolean isIdentInitial(Codepoint cp) { + return Character.isLetter(cp.cp()) || cp.cp() == '_'; + } + + public static boolean isIdentFinal(Codepoint cp) { + return isIdentInitial(cp) || Character.isDigit(cp.cp()); + } + + public static boolean isNewline(Codepoint cp) { + return cp.cp() == '\n'; + } + + public static boolean isNumeral(Codepoint cp) { + return Character.isDigit(cp.cp()); + } + + public static boolean isWhitespace(Codepoint cp) { + return Character.isWhitespace(cp.cp()); + } + + private final PeekableStream input; + + public Lexer(InputStream input) { + this(new InputStreamReader(input)); + } + + public Lexer(Reader input) { + var cpStream = new BufferedReader(input) + .lines() + .flatMapToInt(str -> IntStream.concat(str.codePoints(), IntStream.of('\n'))) + .mapToObj(Codepoint::new); + var theEof = Stream.of(new Codepoint(-1)); + this.input = new PeekableStream<>(Stream.concat(cpStream, theEof).iterator()); + } + + public Lexer(String input) { + this(new StringReader(input)); + } + + private boolean hasNext = true; + + @Override + public Token next() { + var tok = nextToken(); + if (tok.type() == Token.Type.EOF) { + hasNext = false; + } + return tok; + } + + @Override + public boolean hasNext() { + return hasNext; + } + + private Token nextToken() { + skipWhitespace(); + return switch (input.peek().cp()) { + case -1 -> new Token(Token.Type.EOF, ""); + + case '*' -> new Token(Token.Type.ASTERISK, input.next()); + case '!' -> new Token(Token.Type.BANG, input.next()); + case '[' -> new Token(Token.Type.BRACKET_LEFT, input.next()); + case ']' -> new Token(Token.Type.BRACKET_RIGHT, input.next()); + case ',' -> new Token(Token.Type.COMMA, input.next()); + case '=' -> new Token(Token.Type.EQUAL, input.next()); + case '>' -> { + var first = input.next(); + if (input.peek().cp() == '=') { + yield new Token(Token.Type.GREATER_EQUAL, first, input.next()); + } else { + yield new Token(Token.Type.GREATER, first); + } + } + case '<' -> { + var first = input.next(); + if (input.peek().cp() == '=') { + yield new Token(Token.Type.LESS_EQUAL, first, input.next()); + } else { + yield new Token(Token.Type.LESS, first); + } + } + case '-' -> { + var first = input.next(); + if (input.peek().cp() == '>') { + yield new Token(Token.Type.MINUS_GREATER, first, input.next()); + } else { + yield new Token(Token.Type.MINUS, first); + } + } + case '(' -> new Token(Token.Type.PAREN_LEFT, input.next()); + case ')' -> new Token(Token.Type.PAREN_RIGHT, input.next()); + case '+' -> new Token(Token.Type.PLUS, input.next()); + case '?' -> { + var first = input.next(); + if (input.peek().cp() == '=') { + yield new Token(Token.Type.QUESTION_EQUAL, first, input.next()); + } else { + yield new Token(Token.Type.ILLEGAL, first, input.next()); + } + } + case ';' -> new Token(Token.Type.SEMICOLON, input.next()); + case '/' -> { + var first = input.next(); + if (input.peek().cp() == '=') { + yield new Token(Token.Type.SLASH_EQUAL, first, input.next()); + } else { + yield new Token(Token.Type.SLASH, first); + } + } + + case '"' -> new Token(Token.Type.STRING, readString()); + + default -> { + if (isIdentInitial(input.peek())) { + var ident = readIdentifier(); + var type = switch (ident) { + case "and" -> Token.Type.AND; + case "def" -> Token.Type.DEF; + case "do" -> Token.Type.DO; + case "else" -> Token.Type.ELSE; + case "end" -> Token.Type.END; + case "false" -> Token.Type.FALSE; + case "fn" -> Token.Type.FN; + case "if" -> Token.Type.IF; + case "in" -> Token.Type.IN; + case "let" -> Token.Type.LET; + case "then" -> Token.Type.THEN; + case "true" -> Token.Type.TRUE; + default -> Token.Type.IDENTIFIER; + }; + yield new Token(type, ident); + } else if (isNumeral(input.peek())) { + yield new Token(Token.Type.INTEGER, readInteger()); + } else { + yield new Token(Token.Type.ILLEGAL, input.next()); + } + } + }; + } + + private T foldWhile(Predicate pred, T initial, BiFunction combine) { + var res = initial; + var ch = input.peek(); + while (pred.test(ch)) { + res = combine.apply(res, input.next()); + ch = input.peek(); + } + return res; + } + + private String readWhile(Predicate pred) { + return foldWhile(pred, new StringBuilder(), StringBuilder::append).toString(); + } + + private void skipWhile(Predicate pred) { + foldWhile(pred, Object.class, (x, _) -> x); + } + + private String readIdentifier() { + return readWhile(Lexer::isIdentFinal); + } + + private String readInteger() { + return readWhile(Lexer::isNumeral); + } + + private String readString() { + input.next(); + var literal = readWhile(cp -> cp.cp() != '"'); + input.next(); + return literal; + } + + private void skipWhitespace() { + while (true) { + skipWhile(Lexer::isWhitespace); + if (input.peek().cp() != '#') { + return; + } + skipWhile(cp -> !isNewline(cp)); + } + } +} diff --git a/lexer/src/main/java/lv/enes/orang/lexer/Token.java b/lexer/src/main/java/lv/enes/orang/lexer/Token.java new file mode 100644 index 0000000..59626c7 --- /dev/null +++ b/lexer/src/main/java/lv/enes/orang/lexer/Token.java @@ -0,0 +1,62 @@ +package lv.enes.orang.lexer; + +import lv.enes.orang.utils.Codepoint; + +public record Token(Type type, String literal) { + public Token(Type type, Codepoint... cps) { + this(type, codepointsToString(cps)); + } + + private static String codepointsToString(Codepoint... cps) { + var sb = new StringBuilder(cps.length); + for (var cp : cps) { + sb.append(cp); + } + return sb.toString(); + } + + public enum Type { + ILLEGAL, + EOF, + + // Literals + IDENTIFIER, + INTEGER, + STRING, + + // Keywords + AND, + DEF, + DO, + ELSE, + END, + FALSE, + FN, + IF, + IN, + LET, + THEN, + TRUE, + + // Special chars + ASTERISK, + BANG, + BRACKET_LEFT, + BRACKET_RIGHT, + COMMA, + EQUAL, + GREATER, + GREATER_EQUAL, + LESS, + LESS_EQUAL, + MINUS, + MINUS_GREATER, + PAREN_LEFT, + PAREN_RIGHT, + PLUS, + QUESTION_EQUAL, + SEMICOLON, + SLASH, + SLASH_EQUAL, + } +} diff --git a/lexer/src/main/java/module-info.java b/lexer/src/main/java/module-info.java new file mode 100644 index 0000000..a57a694 --- /dev/null +++ b/lexer/src/main/java/module-info.java @@ -0,0 +1,5 @@ +module lv.enes.orang.lexer { + exports lv.enes.orang.lexer; + + requires lv.enes.orang.utils; +} \ No newline at end of file diff --git a/orang/build.gradle.kts b/orang/build.gradle.kts index f70609e..ef78ded 100644 --- a/orang/build.gradle.kts +++ b/orang/build.gradle.kts @@ -24,6 +24,7 @@ dependencies { implementation(project(":ast")) implementation(project(":core")) + implementation(project(":parser")) implementation(project(":utils")) } diff --git a/orang/src/main/java/lv/enes/orang/Codepoint.java b/orang/src/main/java/lv/enes/orang/Codepoint.java deleted file mode 100644 index 7157062..0000000 --- a/orang/src/main/java/lv/enes/orang/Codepoint.java +++ /dev/null @@ -1,28 +0,0 @@ -package lv.enes.orang; - -public record Codepoint(int cp) { - @Override - public String toString() { - return Character.toString(cp); - } - - public boolean isIdentInitial() { - return Character.isLetter(cp) || cp == '_'; - } - - public boolean isIdentFinal() { - return isIdentInitial() || Character.isDigit(cp); - } - - public boolean isNewline() { - return cp == '\n'; - } - - public boolean isNumeral() { - return Character.isDigit(cp); - } - - public boolean isWhitespace() { - return Character.isWhitespace(cp); - } -} diff --git a/orang/src/main/java/lv/enes/orang/Lexer.java b/orang/src/main/java/lv/enes/orang/Lexer.java deleted file mode 100644 index d4e1533..0000000 --- a/orang/src/main/java/lv/enes/orang/Lexer.java +++ /dev/null @@ -1,174 +0,0 @@ -package lv.enes.orang; - -import java.io.*; -import java.util.Iterator; -import java.util.function.BiFunction; -import java.util.function.Predicate; -import java.util.stream.IntStream; -import java.util.stream.Stream; - -public class Lexer implements Iterator { - private final PeekableStream input; - - public Lexer(InputStream input) { - this(new InputStreamReader(input)); - } - - public Lexer(Reader input) { - var cpStream = new BufferedReader(input) - .lines() - .flatMapToInt(str -> IntStream.concat(str.codePoints(), IntStream.of('\n'))) - .mapToObj(Codepoint::new); - var theEof = Stream.of(new Codepoint(-1)); - this.input = new PeekableStream<>(Stream.concat(cpStream, theEof).iterator()); - } - - public Lexer(String input) { - this(new StringReader(input)); - } - - private boolean hasNext = true; - - @Override - public Token next() { - var tok = nextToken(); - if (tok.type() == TokenType.EOF) { - hasNext = false; - } - return tok; - } - - @Override - public boolean hasNext() { - return hasNext; - } - - private Token nextToken() { - skipWhitespace(); - return switch (input.peek().cp()) { - case -1 -> new Token(TokenType.EOF, ""); - - case '*' -> new Token(TokenType.ASTERISK, input.next()); - case '!' -> new Token(TokenType.BANG, input.next()); - case '[' -> new Token(TokenType.BRACKET_LEFT, input.next()); - case ']' -> new Token(TokenType.BRACKET_RIGHT, input.next()); - case ',' -> new Token(TokenType.COMMA, input.next()); - case '=' -> new Token(TokenType.EQUAL, input.next()); - case '>' -> { - var first = input.next(); - if (input.peek().cp() == '=') { - yield new Token(TokenType.GREATER_EQUAL, first, input.next()); - } else { - yield new Token(TokenType.GREATER, first); - } - } - case '<' -> { - var first = input.next(); - if (input.peek().cp() == '=') { - yield new Token(TokenType.LESS_EQUAL, first, input.next()); - } else { - yield new Token(TokenType.LESS, first); - } - } - case '-' -> { - var first = input.next(); - if (input.peek().cp() == '>') { - yield new Token(TokenType.MINUS_GREATER, first, input.next()); - } else { - yield new Token(TokenType.MINUS, first); - } - } - case '(' -> new Token(TokenType.PAREN_LEFT, input.next()); - case ')' -> new Token(TokenType.PAREN_RIGHT, input.next()); - case '+' -> new Token(TokenType.PLUS, input.next()); - case '?' -> { - var first = input.next(); - if (input.peek().cp() == '=') { - yield new Token(TokenType.QUESTION_EQUAL, first, input.next()); - } else { - yield new Token(TokenType.ILLEGAL, first, input.next()); - } - } - case ';' -> new Token(TokenType.SEMICOLON, input.next()); - case '/' -> { - var first = input.next(); - if (input.peek().cp() == '=') { - yield new Token(TokenType.SLASH_EQUAL, first, input.next()); - } else { - yield new Token(TokenType.SLASH, first); - } - } - - case '"' -> new Token(TokenType.STRING, readString()); - - default -> { - if (input.peek().isIdentInitial()) { - var ident = readIdentifier(); - var type = switch (ident) { - case "and" -> TokenType.AND; - case "def" -> TokenType.DEF; - case "do" -> TokenType.DO; - case "else" -> TokenType.ELSE; - case "end" -> TokenType.END; - case "false" -> TokenType.FALSE; - case "fn" -> TokenType.FN; - case "if" -> TokenType.IF; - case "in" -> TokenType.IN; - case "let" -> TokenType.LET; - case "then" -> TokenType.THEN; - case "true" -> TokenType.TRUE; - default -> TokenType.IDENTIFIER; - }; - yield new Token(type, ident); - } else if (input.peek().isNumeral()) { - yield new Token(TokenType.INTEGER, readInteger()); - } else { - yield new Token(TokenType.ILLEGAL, input.next()); - } - } - }; - } - - private T foldWhile(Predicate pred, T initial, BiFunction combine) { - var res = initial; - var ch = input.peek(); - while (pred.test(ch)) { - res = combine.apply(res, input.next()); - ch = input.peek(); - } - return res; - } - - private String readWhile(Predicate pred) { - return foldWhile(pred, new StringBuilder(), StringBuilder::append).toString(); - } - - private void skipWhile(Predicate pred) { - foldWhile(pred, Object.class, (x, _) -> x); - } - - private String readIdentifier() { - return readWhile(Codepoint::isIdentFinal); - } - - private String readInteger() { - return readWhile(Codepoint::isNumeral); - } - - private String readString() { - input.next(); - var literal = readWhile(cp -> cp.cp() != '"'); - input.next(); - return literal; - } - - private void skipWhitespace() { - while (true) { - skipWhile(Codepoint::isWhitespace); - if (input.peek().cp() != '#') { - return; - } - skipWhile(cp -> !cp.isNewline()); - } - } -} diff --git a/orang/src/main/java/lv/enes/orang/Main.java b/orang/src/main/java/lv/enes/orang/Main.java index 7ca14a1..eb4bfcc 100644 --- a/orang/src/main/java/lv/enes/orang/Main.java +++ b/orang/src/main/java/lv/enes/orang/Main.java @@ -1,6 +1,7 @@ package lv.enes.orang; import lv.enes.orang.core.OrangException; +import lv.enes.orang.parser.Parser; import java.io.FileReader; import java.io.IOException; diff --git a/orang/src/main/java/lv/enes/orang/Parser.java b/orang/src/main/java/lv/enes/orang/Parser.java deleted file mode 100644 index 77abe24..0000000 --- a/orang/src/main/java/lv/enes/orang/Parser.java +++ /dev/null @@ -1,296 +0,0 @@ -package lv.enes.orang; - -import lv.enes.orang.ast.*; -import lv.enes.orang.ast.IfElseExpression; -import lv.enes.orang.ast.Statement; -import lv.enes.orang.utils.NonEmptyList; - -import java.io.InputStream; -import java.io.Reader; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.function.Predicate; - -public class Parser { - public static Program parseProgram(InputStream in) throws ParserException { - var parser = new Parser(in); - return parser.parseProgram(); - } - - public static Program parseProgram(Reader in) throws ParserException { - var parser = new Parser(in); - return parser.parseProgram(); - } - - public static Program parseProgram(String in) throws ParserException { - var parser = new Parser(in); - return parser.parseProgram(); - } - - private final PeekableStream input; - - public Parser(InputStream in) { - this(new Lexer(in)); - } - - public Parser(Reader in) { - this(new Lexer(in)); - } - - public Parser(String in) { - this(new Lexer(in)); - } - - public Parser(Iterator input) { - this.input = new PeekableStream<>(input); - } - - public Program parseProgram() throws ParserException { - var statements = new ArrayList(); - while (!maybeConsumeToken(TokenType.EOF)) { - statements.add(parseStatement()); - maybeConsumeToken(TokenType.SEMICOLON); - } - return new Program(Collections.unmodifiableList(statements)); - } - - private Token consume(Predicate pred, String msg) throws ParserException { - var tok = input.next(); - if (!pred.test(tok)) { - throw new ParserException(STR."\{msg}, got \{tok}"); - } - return tok; - } - - private Token consumeToken(TokenType type) throws ParserException { - return consume(tok -> tok.type() == type, STR."Expected \{type}"); - } - - private boolean maybeConsumeToken(TokenType type) { - if (input.peek().type() == type) { - input.next(); - return true; - } - return false; - } - - private ArrayExpression parseArray() throws ParserException { - consumeToken(TokenType.BRACKET_LEFT); - if (maybeConsumeToken(TokenType.BRACKET_RIGHT)) { - return new ArrayExpression(List.of()); - } - - var items = new ArrayList(); - do { - items.add(parseExpression()); - } while (maybeConsumeToken(TokenType.COMMA)); - consumeToken(TokenType.BRACKET_RIGHT); - - return new ArrayExpression(Collections.unmodifiableList(items)); - } - - private List parseArgSpecs() throws ParserException { - var argSpecs = new ArrayList(); - while (true) { - if (input.peek().type() == TokenType.IDENTIFIER) { - argSpecs.add(ArgSpec.named(input.next().literal())); - } else if (input.peek().type() == TokenType.PAREN_LEFT) { - consumeToken(TokenType.PAREN_LEFT); - consumeToken(TokenType.PAREN_RIGHT); - argSpecs.add(ArgSpec.nothing()); - } else { - break; - } - } - return Collections.unmodifiableList(argSpecs); - } - - private Expression parseBinaryExpression() throws ParserException { - var lhs = parseCallExpression(); - if (!input.peek().type().isBinaryOp()) { - return lhs; - } - - return parseBinaryExpressionRhs(lhs, input.next().type().toBinaryOp()); - } - - private Expression parseBinaryExpressionRhs(Expression lhs, BinaryExpression.Operator op) throws ParserException { - var rhs = parseCallExpression(); - if (!input.peek().type().isBinaryOp()) { - return new BinaryExpression(op, lhs, rhs); - } - - var op2 = input.next().type().toBinaryOp(); - if (op2.bindsStrongerThan(op)) { - return new BinaryExpression(op, lhs, parseBinaryExpressionRhs(rhs, op2)); - } else { - return parseBinaryExpressionRhs(new BinaryExpression(op, lhs, rhs), op2); - } - } - - private BooleanLiteral parseBoolean() throws ParserException { - var t = consume(tok -> tok.type() == TokenType.FALSE || tok.type() == TokenType.TRUE, "Expected TRUE or FALSE"); - return new BooleanLiteral(t.type() == TokenType.TRUE); - } - - private Expression parseCallExpression() throws ParserException { - var callee = parseSimpleExpression(); - while (couldStartSimpleExpression(input.peek().type())) { - var arg = parseSimpleExpression(); - callee = new CallExpression(callee, arg); - } - return callee; - } - - private Definition parseDefinition() throws ParserException { - consumeToken(TokenType.DEF); - var defSpec = parseDefSpec(); - consumeToken(TokenType.EQUAL); - var value = parseExpression(); - if (defSpec.args().isEmpty()) { - return new Definition(defSpec.name(), value); - } else { - return new Definition(defSpec.name(), new FnExpression(new NonEmptyList<>(defSpec.args()), value)); - } - } - - private DefSpec parseDefSpec() throws ParserException { - var name = consumeToken(TokenType.IDENTIFIER).literal(); - var argSpecs = parseArgSpecs(); - return new DefSpec(name, argSpecs); - } - - private DoExpression parseDoExpression() throws ParserException { - consumeToken(TokenType.DO); - var exprs = new ArrayList(); - do { - exprs.add(parseExpression()); - } while (maybeConsumeToken(TokenType.SEMICOLON)); - consumeToken(TokenType.END); - return new DoExpression(Collections.unmodifiableList(exprs)); - } - - private Expression parseExpression() throws ParserException { - if (input.peek().type().isUnaryOp()) { - return parseUnaryExpression(); - } - return parseBinaryExpression(); - } - - private FnExpression parseFnExpression() throws ParserException { - consumeToken(TokenType.FN); - var argSpecs = parseArgSpecs(); - if (argSpecs.isEmpty()) { - throw new ParserException("Function definition with no arguments"); - } - var body = maybeConsumeToken(TokenType.MINUS_GREATER) ? parseExpression() : parseDoExpression(); - return new FnExpression(new NonEmptyList<>(argSpecs), body); - } - - private IfElseExpression parseIfElseExpression() throws ParserException { - consumeToken(TokenType.IF); - var cond = parseExpression(); - consumeToken(TokenType.THEN); - var trueBranch = parseExpression(); - consumeToken(TokenType.ELSE); - var falseBranch = parseExpression(); - return new IfElseExpression(cond, trueBranch, falseBranch); - } - - private IntLiteral parseInteger() throws ParserException { - var tok = consumeToken(TokenType.INTEGER); - return new IntLiteral(Integer.parseInt(tok.literal())); - } - - private LetInExpression parseLetInExpression() throws ParserException { - consumeToken(TokenType.LET); - var bindings = new ArrayList(); - do { - var defSpec = parseDefSpec(); - consumeToken(TokenType.EQUAL); - var value = parseExpression(); - if (defSpec.args().isEmpty()) { - bindings.add(new LetInExpression.Binding(defSpec.name(), value)); - } else { - var fn = new FnExpression(new NonEmptyList<>(defSpec.args()), value); - bindings.add(new LetInExpression.Binding(defSpec.name(), fn)); - } - } while (maybeConsumeToken(TokenType.AND)); - consumeToken(TokenType.IN); - var body = parseExpression(); - return new LetInExpression(Collections.unmodifiableList(bindings), body); - } - - private Expression parseSimpleExpression() throws ParserException { - return switch (input.peek().type()) { - case PAREN_LEFT -> { - consumeToken(TokenType.PAREN_LEFT); - if (maybeConsumeToken(TokenType.PAREN_RIGHT)) { - yield VoidExpression.INSTANCE; - } - var expr = parseExpression(); - consumeToken(TokenType.PAREN_RIGHT); - yield expr; - } - case TRUE, FALSE -> parseBoolean(); - case INTEGER -> parseInteger(); - case IDENTIFIER -> new VariableExpression(input.next().literal()); - case STRING -> parseString(); - case BRACKET_LEFT -> parseArray(); - case IF -> parseIfElseExpression(); - case LET -> parseLetInExpression(); - case FN -> parseFnExpression(); - case DO -> parseDoExpression(); - default -> throw new ParserException(STR."Unexpected token \{input.peek()}"); - }; - } - - private boolean couldStartSimpleExpression(TokenType type) { - return switch (type) { - case PAREN_LEFT, TRUE, FALSE, INTEGER, IDENTIFIER, STRING, BRACKET_LEFT, IF, LET, FN, DO -> true; - default -> false; - }; - } - - private Statement parseStatement() throws ParserException { - if (input.peek().type() == TokenType.DEF) { - return parseDefinition(); - } else { - return new ExpressionStatement(parseExpression()); - } - } - - private Expression parseString() throws ParserException { - var sb = new StringBuilder(); - var cps = input.next().literal().codePoints().iterator(); - while (cps.hasNext()) { - var cp = cps.next(); - if (cp == '\\') { - var escapeChar = cps.next(); - //noinspection UnnecessaryUnboxing - sb.append(switch (escapeChar.intValue()) { - case '\'' -> '\''; - case '"' -> '"'; - case 'r' -> '\r'; - case 'n' -> '\n'; - case 't' -> '\t'; - default -> throw new ParserException(STR."Unknown string escape '\\\{escapeChar}'"); - }); - } else { - sb.appendCodePoint(cp); - } - } - return new StringLiteral(sb.toString()); - } - - private Expression parseUnaryExpression() throws ParserException { - if (input.peek().type().isUnaryOp()) { - var op = input.next().type().toUnaryOp(); - return new UnaryExpression(op, parseUnaryExpression()); - } else { - return parseSimpleExpression(); - } - } -} diff --git a/orang/src/main/java/lv/enes/orang/ParserException.java b/orang/src/main/java/lv/enes/orang/ParserException.java deleted file mode 100644 index bd65e7a..0000000 --- a/orang/src/main/java/lv/enes/orang/ParserException.java +++ /dev/null @@ -1,9 +0,0 @@ -package lv.enes.orang; - -import lv.enes.orang.core.OrangException; - -public class ParserException extends OrangException { - public ParserException(String message) { - super(message); - } -} diff --git a/orang/src/main/java/lv/enes/orang/PeekableStream.java b/orang/src/main/java/lv/enes/orang/PeekableStream.java deleted file mode 100644 index b77bab1..0000000 --- a/orang/src/main/java/lv/enes/orang/PeekableStream.java +++ /dev/null @@ -1,38 +0,0 @@ -package lv.enes.orang; - -import java.util.ArrayDeque; -import java.util.Deque; -import java.util.Iterator; - -public class PeekableStream implements Iterator { - private final Iterator input; - private final Deque buffer = new ArrayDeque<>(); - - public PeekableStream(Iterator input) { - this.input = input; - } - - @Override - public boolean hasNext() { - return !buffer.isEmpty() || input.hasNext(); - } - - @Override - public T next() { - if (!buffer.isEmpty()) { - return buffer.pop(); - } else { - return input.next(); - } - } - - public T peek() { - var value = next(); - putBack(value); - return value; - } - - public void putBack(T value) { - buffer.push(value); - } -} diff --git a/orang/src/main/java/lv/enes/orang/Token.java b/orang/src/main/java/lv/enes/orang/Token.java deleted file mode 100644 index 4456b8f..0000000 --- a/orang/src/main/java/lv/enes/orang/Token.java +++ /dev/null @@ -1,15 +0,0 @@ -package lv.enes.orang; - -public record Token(TokenType type, String literal) { - public Token(TokenType type, Codepoint... cps) { - this(type, codepointsToString(cps)); - } - - private static String codepointsToString(Codepoint... cps) { - var sb = new StringBuilder(cps.length); - for (var cp : cps) { - sb.append(cp); - } - return sb.toString(); - } -} diff --git a/orang/src/main/java/lv/enes/orang/TokenType.java b/orang/src/main/java/lv/enes/orang/TokenType.java deleted file mode 100644 index 960435e..0000000 --- a/orang/src/main/java/lv/enes/orang/TokenType.java +++ /dev/null @@ -1,91 +0,0 @@ -package lv.enes.orang; - -import lv.enes.orang.ast.BinaryExpression; -import lv.enes.orang.ast.UnaryExpression; - -public enum TokenType { - ILLEGAL, - EOF, - - // Literals - IDENTIFIER, - INTEGER, - STRING, - - // Keywords - AND, - DEF, - DO, - ELSE, - END, - FALSE, - FN, - IF, - IN, - LET, - THEN, - TRUE, - - // Special chars - ASTERISK, - BANG, - BRACKET_LEFT, - BRACKET_RIGHT, - COMMA, - EQUAL, - GREATER, - GREATER_EQUAL, - LESS, - LESS_EQUAL, - MINUS, - MINUS_GREATER, - PAREN_LEFT, - PAREN_RIGHT, - PLUS, - QUESTION_EQUAL, - SEMICOLON, - SLASH, - SLASH_EQUAL, - - ; - - public boolean isBinaryOp() { - return switch (this) { - case ASTERISK, SLASH, PLUS, MINUS, QUESTION_EQUAL, SLASH_EQUAL, GREATER, GREATER_EQUAL, LESS, LESS_EQUAL - -> true; - default -> false; - }; - } - - public BinaryExpression.Operator toBinaryOp() { - return switch (this) { - case ASTERISK -> BinaryExpression.Operator.MULTIPLY; - case SLASH -> BinaryExpression.Operator.DIVIDE; - case PLUS -> BinaryExpression.Operator.ADD; - case MINUS -> BinaryExpression.Operator.SUBTRACT; - case QUESTION_EQUAL -> BinaryExpression.Operator.EQUALS; - case SLASH_EQUAL -> BinaryExpression.Operator.NOT_EQUALS; - case GREATER -> BinaryExpression.Operator.GT; - case GREATER_EQUAL -> BinaryExpression.Operator.GTE; - case LESS -> BinaryExpression.Operator.LT; - case LESS_EQUAL -> BinaryExpression.Operator.LTE; - default -> throw new IllegalStateException("Token " + this + " is not a binary operator"); - }; - } - - public boolean isUnaryOp() { - return switch (this) { - case PLUS, MINUS, BANG -> true; - default -> false; - }; - } - - public UnaryExpression.Operator toUnaryOp() { - return switch (this) { - case PLUS -> UnaryExpression.Operator.PLUS; - case MINUS -> UnaryExpression.Operator.NEGATE; - case BANG -> UnaryExpression.Operator.NOT; - default -> throw new IllegalStateException("Token " + this + " is not a unary operator"); - }; - } -} diff --git a/orang/src/main/java/module-info.java b/orang/src/main/java/module-info.java index 060657f..353d0dd 100644 --- a/orang/src/main/java/module-info.java +++ b/orang/src/main/java/module-info.java @@ -4,6 +4,7 @@ module lv.enes.orang { requires lv.enes.orang.ast; requires lv.enes.orang.core; + requires lv.enes.orang.parser; requires lv.enes.orang.utils; requires static jakarta.annotation; diff --git a/parser/build.gradle.kts b/parser/build.gradle.kts new file mode 100644 index 0000000..9934d9d --- /dev/null +++ b/parser/build.gradle.kts @@ -0,0 +1,22 @@ +plugins { + java +} + +dependencies { + implementation(project(":ast")) + implementation(project(":core")) + implementation(project(":lexer")) + implementation(project(":utils")) +} + +java { + sourceCompatibility = JavaVersion.VERSION_22 + targetCompatibility = JavaVersion.VERSION_22 + toolchain { + languageVersion = JavaLanguageVersion.of(22) + } +} + +tasks.withType { + options.compilerArgs.add("--enable-preview") +} \ No newline at end of file diff --git a/parser/src/main/java/lv/enes/orang/parser/Parser.java b/parser/src/main/java/lv/enes/orang/parser/Parser.java new file mode 100644 index 0000000..6c86e85 --- /dev/null +++ b/parser/src/main/java/lv/enes/orang/parser/Parser.java @@ -0,0 +1,339 @@ +package lv.enes.orang.parser; + +import lv.enes.orang.ast.*; +import lv.enes.orang.ast.IfElseExpression; +import lv.enes.orang.ast.Statement; +import lv.enes.orang.lexer.Lexer; +import lv.enes.orang.lexer.Token; +import lv.enes.orang.utils.NonEmptyList; +import lv.enes.orang.utils.PeekableStream; + +import java.io.InputStream; +import java.io.Reader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.function.Predicate; + +public class Parser { + public static Program parseProgram(InputStream in) throws ParserException { + var parser = new Parser(in); + return parser.parseProgram(); + } + + public static Program parseProgram(Reader in) throws ParserException { + var parser = new Parser(in); + return parser.parseProgram(); + } + + public static Program parseProgram(String in) throws ParserException { + var parser = new Parser(in); + return parser.parseProgram(); + } + + private final PeekableStream input; + + public Parser(InputStream in) { + this(new Lexer(in)); + } + + public Parser(Reader in) { + this(new Lexer(in)); + } + + public Parser(String in) { + this(new Lexer(in)); + } + + public Parser(Iterator input) { + this.input = new PeekableStream<>(input); + } + + public Program parseProgram() throws ParserException { + var statements = new ArrayList(); + while (!maybeConsumeToken(Token.Type.EOF)) { + statements.add(parseStatement()); + maybeConsumeToken(Token.Type.SEMICOLON); + } + return new Program(Collections.unmodifiableList(statements)); + } + + private static boolean isBinaryOp(Token token) { + return switch (token.type()) { + case ASTERISK, SLASH, PLUS, MINUS, QUESTION_EQUAL, SLASH_EQUAL, GREATER, GREATER_EQUAL, LESS, LESS_EQUAL + -> true; + default -> false; + }; + } + + private static BinaryExpression.Operator toBinaryOp(Token token) { + return switch (token.type()) { + case ASTERISK -> BinaryExpression.Operator.MULTIPLY; + case SLASH -> BinaryExpression.Operator.DIVIDE; + case PLUS -> BinaryExpression.Operator.ADD; + case MINUS -> BinaryExpression.Operator.SUBTRACT; + case QUESTION_EQUAL -> BinaryExpression.Operator.EQUALS; + case SLASH_EQUAL -> BinaryExpression.Operator.NOT_EQUALS; + case GREATER -> BinaryExpression.Operator.GT; + case GREATER_EQUAL -> BinaryExpression.Operator.GTE; + case LESS -> BinaryExpression.Operator.LT; + case LESS_EQUAL -> BinaryExpression.Operator.LTE; + default -> throw new IllegalStateException(STR."Token \{token.type()} is not a binary operator"); + }; + } + + public static boolean isUnaryOp(Token token) { + return switch (token.type()) { + case PLUS, MINUS, BANG -> true; + default -> false; + }; + } + + public static UnaryExpression.Operator toUnaryOp(Token token) { + return switch (token.type()) { + case PLUS -> UnaryExpression.Operator.PLUS; + case MINUS -> UnaryExpression.Operator.NEGATE; + case BANG -> UnaryExpression.Operator.NOT; + default -> throw new IllegalStateException(STR."Token \{token.type()} is not a unary operator"); + }; + } + + private Token consume(Predicate pred, String msg) throws ParserException { + var tok = input.next(); + if (!pred.test(tok)) { + throw new ParserException(STR."\{msg}, got \{tok}"); + } + return tok; + } + + private Token consumeToken(Token.Type type) throws ParserException { + return consume(tok -> tok.type() == type, STR."Expected \{type}"); + } + + private boolean maybeConsumeToken(Token.Type type) { + if (input.peek().type() == type) { + input.next(); + return true; + } + return false; + } + + private ArrayExpression parseArray() throws ParserException { + consumeToken(Token.Type.BRACKET_LEFT); + if (maybeConsumeToken(Token.Type.BRACKET_RIGHT)) { + return new ArrayExpression(List.of()); + } + + var items = new ArrayList(); + do { + items.add(parseExpression()); + } while (maybeConsumeToken(Token.Type.COMMA)); + consumeToken(Token.Type.BRACKET_RIGHT); + + return new ArrayExpression(Collections.unmodifiableList(items)); + } + + private List parseArgSpecs() throws ParserException { + var argSpecs = new ArrayList(); + while (true) { + if (input.peek().type() == Token.Type.IDENTIFIER) { + argSpecs.add(ArgSpec.named(input.next().literal())); + } else if (input.peek().type() == Token.Type.PAREN_LEFT) { + consumeToken(Token.Type.PAREN_LEFT); + consumeToken(Token.Type.PAREN_RIGHT); + argSpecs.add(ArgSpec.nothing()); + } else { + break; + } + } + return Collections.unmodifiableList(argSpecs); + } + + private Expression parseBinaryExpression() throws ParserException { + var lhs = parseCallExpression(); + if (!isBinaryOp(input.peek())) { + return lhs; + } + + return parseBinaryExpressionRhs(lhs, toBinaryOp(input.next())); + } + + private Expression parseBinaryExpressionRhs(Expression lhs, BinaryExpression.Operator op) throws ParserException { + var rhs = parseCallExpression(); + if (!isBinaryOp(input.peek())) { + return new BinaryExpression(op, lhs, rhs); + } + + var op2 = toBinaryOp(input.next()); + if (op2.bindsStrongerThan(op)) { + return new BinaryExpression(op, lhs, parseBinaryExpressionRhs(rhs, op2)); + } else { + return parseBinaryExpressionRhs(new BinaryExpression(op, lhs, rhs), op2); + } + } + + private BooleanLiteral parseBoolean() throws ParserException { + var t = consume(tok -> tok.type() == Token.Type.FALSE || tok.type() == Token.Type.TRUE, "Expected TRUE or FALSE"); + return new BooleanLiteral(t.type() == Token.Type.TRUE); + } + + private Expression parseCallExpression() throws ParserException { + var callee = parseSimpleExpression(); + while (couldStartSimpleExpression(input.peek().type())) { + var arg = parseSimpleExpression(); + callee = new CallExpression(callee, arg); + } + return callee; + } + + private Definition parseDefinition() throws ParserException { + consumeToken(Token.Type.DEF); + var defSpec = parseDefSpec(); + consumeToken(Token.Type.EQUAL); + var value = parseExpression(); + if (defSpec.args().isEmpty()) { + return new Definition(defSpec.name(), value); + } else { + return new Definition(defSpec.name(), new FnExpression(new NonEmptyList<>(defSpec.args()), value)); + } + } + + private DefSpec parseDefSpec() throws ParserException { + var name = consumeToken(Token.Type.IDENTIFIER).literal(); + var argSpecs = parseArgSpecs(); + return new DefSpec(name, argSpecs); + } + + private DoExpression parseDoExpression() throws ParserException { + consumeToken(Token.Type.DO); + var exprs = new ArrayList(); + do { + exprs.add(parseExpression()); + } while (maybeConsumeToken(Token.Type.SEMICOLON)); + consumeToken(Token.Type.END); + return new DoExpression(Collections.unmodifiableList(exprs)); + } + + private Expression parseExpression() throws ParserException { + if (isUnaryOp(input.peek())) { + return parseUnaryExpression(); + } + return parseBinaryExpression(); + } + + private FnExpression parseFnExpression() throws ParserException { + consumeToken(Token.Type.FN); + var argSpecs = parseArgSpecs(); + if (argSpecs.isEmpty()) { + throw new ParserException("Function definition with no arguments"); + } + var body = maybeConsumeToken(Token.Type.MINUS_GREATER) ? parseExpression() : parseDoExpression(); + return new FnExpression(new NonEmptyList<>(argSpecs), body); + } + + private IfElseExpression parseIfElseExpression() throws ParserException { + consumeToken(Token.Type.IF); + var cond = parseExpression(); + consumeToken(Token.Type.THEN); + var trueBranch = parseExpression(); + consumeToken(Token.Type.ELSE); + var falseBranch = parseExpression(); + return new IfElseExpression(cond, trueBranch, falseBranch); + } + + private IntLiteral parseInteger() throws ParserException { + var tok = consumeToken(Token.Type.INTEGER); + return new IntLiteral(Integer.parseInt(tok.literal())); + } + + private LetInExpression parseLetInExpression() throws ParserException { + consumeToken(Token.Type.LET); + var bindings = new ArrayList(); + do { + var defSpec = parseDefSpec(); + consumeToken(Token.Type.EQUAL); + var value = parseExpression(); + if (defSpec.args().isEmpty()) { + bindings.add(new LetInExpression.Binding(defSpec.name(), value)); + } else { + var fn = new FnExpression(new NonEmptyList<>(defSpec.args()), value); + bindings.add(new LetInExpression.Binding(defSpec.name(), fn)); + } + } while (maybeConsumeToken(Token.Type.AND)); + consumeToken(Token.Type.IN); + var body = parseExpression(); + return new LetInExpression(Collections.unmodifiableList(bindings), body); + } + + private Expression parseSimpleExpression() throws ParserException { + return switch (input.peek().type()) { + case PAREN_LEFT -> { + consumeToken(Token.Type.PAREN_LEFT); + if (maybeConsumeToken(Token.Type.PAREN_RIGHT)) { + yield VoidExpression.INSTANCE; + } + var expr = parseExpression(); + consumeToken(Token.Type.PAREN_RIGHT); + yield expr; + } + case TRUE, FALSE -> parseBoolean(); + case INTEGER -> parseInteger(); + case IDENTIFIER -> new VariableExpression(input.next().literal()); + case STRING -> parseString(); + case BRACKET_LEFT -> parseArray(); + case IF -> parseIfElseExpression(); + case LET -> parseLetInExpression(); + case FN -> parseFnExpression(); + case DO -> parseDoExpression(); + default -> throw new ParserException(STR."Unexpected token \{input.peek()}"); + }; + } + + private boolean couldStartSimpleExpression(Token.Type type) { + return switch (type) { + case PAREN_LEFT, TRUE, FALSE, INTEGER, IDENTIFIER, STRING, BRACKET_LEFT, IF, LET, FN, DO -> true; + default -> false; + }; + } + + private Statement parseStatement() throws ParserException { + if (input.peek().type() == Token.Type.DEF) { + return parseDefinition(); + } else { + return new ExpressionStatement(parseExpression()); + } + } + + private Expression parseString() throws ParserException { + var sb = new StringBuilder(); + var cps = input.next().literal().codePoints().iterator(); + while (cps.hasNext()) { + var cp = cps.next(); + if (cp == '\\') { + var escapeChar = cps.next(); + //noinspection UnnecessaryUnboxing + sb.append(switch (escapeChar.intValue()) { + case '\'' -> '\''; + case '"' -> '"'; + case 'r' -> '\r'; + case 'n' -> '\n'; + case 't' -> '\t'; + default -> throw new ParserException(STR."Unknown string escape '\\\{escapeChar}'"); + }); + } else { + sb.appendCodePoint(cp); + } + } + return new StringLiteral(sb.toString()); + } + + private Expression parseUnaryExpression() throws ParserException { + if (isUnaryOp(input.peek())) { + var op = toUnaryOp(input.next()); + return new UnaryExpression(op, parseUnaryExpression()); + } else { + return parseSimpleExpression(); + } + } +} diff --git a/parser/src/main/java/lv/enes/orang/parser/ParserException.java b/parser/src/main/java/lv/enes/orang/parser/ParserException.java new file mode 100644 index 0000000..632ce15 --- /dev/null +++ b/parser/src/main/java/lv/enes/orang/parser/ParserException.java @@ -0,0 +1,9 @@ +package lv.enes.orang.parser; + +import lv.enes.orang.core.OrangException; + +public class ParserException extends OrangException { + public ParserException(String message) { + super(message); + } +} diff --git a/parser/src/main/java/module-info.java b/parser/src/main/java/module-info.java new file mode 100644 index 0000000..43c2dc5 --- /dev/null +++ b/parser/src/main/java/module-info.java @@ -0,0 +1,8 @@ +module lv.enes.orang.parser { + exports lv.enes.orang.parser; + + requires lv.enes.orang.ast; + requires lv.enes.orang.core; + requires lv.enes.orang.lexer; + requires lv.enes.orang.utils; +} \ No newline at end of file diff --git a/settings.gradle.kts b/settings.gradle.kts index fc692fa..391d33b 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -1,3 +1,3 @@ rootProject.name = "orang" -include("ast", "core", "orang", "utils") \ No newline at end of file +include("ast", "core", "lexer", "orang", "parser", "utils") \ No newline at end of file diff --git a/utils/src/main/java/lv/enes/orang/utils/Codepoint.java b/utils/src/main/java/lv/enes/orang/utils/Codepoint.java new file mode 100644 index 0000000..a981c5e --- /dev/null +++ b/utils/src/main/java/lv/enes/orang/utils/Codepoint.java @@ -0,0 +1,8 @@ +package lv.enes.orang.utils; + +public record Codepoint(int cp) { + @Override + public String toString() { + return Character.toString(cp); + } +} diff --git a/utils/src/main/java/lv/enes/orang/utils/PeekableStream.java b/utils/src/main/java/lv/enes/orang/utils/PeekableStream.java new file mode 100644 index 0000000..7607a50 --- /dev/null +++ b/utils/src/main/java/lv/enes/orang/utils/PeekableStream.java @@ -0,0 +1,38 @@ +package lv.enes.orang.utils; + +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.Iterator; + +public class PeekableStream implements Iterator { + private final Iterator input; + private final Deque buffer = new ArrayDeque<>(); + + public PeekableStream(Iterator input) { + this.input = input; + } + + @Override + public boolean hasNext() { + return !buffer.isEmpty() || input.hasNext(); + } + + @Override + public T next() { + if (!buffer.isEmpty()) { + return buffer.pop(); + } else { + return input.next(); + } + } + + public T peek() { + var value = next(); + putBack(value); + return value; + } + + public void putBack(T value) { + buffer.push(value); + } +} -- cgit v1.2.3