diff options
Diffstat (limited to 'parser')
| -rw-r--r-- | parser/build.gradle.kts | 22 | ||||
| -rw-r--r-- | parser/src/main/java/lv/enes/orang/parser/Parser.java | 339 | ||||
| -rw-r--r-- | parser/src/main/java/lv/enes/orang/parser/ParserException.java | 9 | ||||
| -rw-r--r-- | parser/src/main/java/module-info.java | 8 |
4 files changed, 378 insertions, 0 deletions
diff --git a/parser/build.gradle.kts b/parser/build.gradle.kts new file mode 100644 index 0000000..9934d9d --- /dev/null +++ b/parser/build.gradle.kts | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | plugins { | ||
| 2 | java | ||
| 3 | } | ||
| 4 | |||
| 5 | dependencies { | ||
| 6 | implementation(project(":ast")) | ||
| 7 | implementation(project(":core")) | ||
| 8 | implementation(project(":lexer")) | ||
| 9 | implementation(project(":utils")) | ||
| 10 | } | ||
| 11 | |||
| 12 | java { | ||
| 13 | sourceCompatibility = JavaVersion.VERSION_22 | ||
| 14 | targetCompatibility = JavaVersion.VERSION_22 | ||
| 15 | toolchain { | ||
| 16 | languageVersion = JavaLanguageVersion.of(22) | ||
| 17 | } | ||
| 18 | } | ||
| 19 | |||
| 20 | tasks.withType<JavaCompile> { | ||
| 21 | options.compilerArgs.add("--enable-preview") | ||
| 22 | } \ No newline at end of file | ||
diff --git a/parser/src/main/java/lv/enes/orang/parser/Parser.java b/parser/src/main/java/lv/enes/orang/parser/Parser.java new file mode 100644 index 0000000..6c86e85 --- /dev/null +++ b/parser/src/main/java/lv/enes/orang/parser/Parser.java | |||
| @@ -0,0 +1,339 @@ | |||
| 1 | package lv.enes.orang.parser; | ||
| 2 | |||
| 3 | import lv.enes.orang.ast.*; | ||
| 4 | import lv.enes.orang.ast.IfElseExpression; | ||
| 5 | import lv.enes.orang.ast.Statement; | ||
| 6 | import lv.enes.orang.lexer.Lexer; | ||
| 7 | import lv.enes.orang.lexer.Token; | ||
| 8 | import lv.enes.orang.utils.NonEmptyList; | ||
| 9 | import lv.enes.orang.utils.PeekableStream; | ||
| 10 | |||
| 11 | import java.io.InputStream; | ||
| 12 | import java.io.Reader; | ||
| 13 | import java.util.ArrayList; | ||
| 14 | import java.util.Collections; | ||
| 15 | import java.util.Iterator; | ||
| 16 | import java.util.List; | ||
| 17 | import java.util.function.Predicate; | ||
| 18 | |||
| 19 | public class Parser { | ||
| 20 | public static Program parseProgram(InputStream in) throws ParserException { | ||
| 21 | var parser = new Parser(in); | ||
| 22 | return parser.parseProgram(); | ||
| 23 | } | ||
| 24 | |||
| 25 | public static Program parseProgram(Reader in) throws ParserException { | ||
| 26 | var parser = new Parser(in); | ||
| 27 | return parser.parseProgram(); | ||
| 28 | } | ||
| 29 | |||
| 30 | public static Program parseProgram(String in) throws ParserException { | ||
| 31 | var parser = new Parser(in); | ||
| 32 | return parser.parseProgram(); | ||
| 33 | } | ||
| 34 | |||
| 35 | private final PeekableStream<Token> input; | ||
| 36 | |||
| 37 | public Parser(InputStream in) { | ||
| 38 | this(new Lexer(in)); | ||
| 39 | } | ||
| 40 | |||
| 41 | public Parser(Reader in) { | ||
| 42 | this(new Lexer(in)); | ||
| 43 | } | ||
| 44 | |||
| 45 | public Parser(String in) { | ||
| 46 | this(new Lexer(in)); | ||
| 47 | } | ||
| 48 | |||
| 49 | public Parser(Iterator<Token> input) { | ||
| 50 | this.input = new PeekableStream<>(input); | ||
| 51 | } | ||
| 52 | |||
| 53 | public Program parseProgram() throws ParserException { | ||
| 54 | var statements = new ArrayList<Statement>(); | ||
| 55 | while (!maybeConsumeToken(Token.Type.EOF)) { | ||
| 56 | statements.add(parseStatement()); | ||
| 57 | maybeConsumeToken(Token.Type.SEMICOLON); | ||
| 58 | } | ||
| 59 | return new Program(Collections.unmodifiableList(statements)); | ||
| 60 | } | ||
| 61 | |||
| 62 | private static boolean isBinaryOp(Token token) { | ||
| 63 | return switch (token.type()) { | ||
| 64 | case ASTERISK, SLASH, PLUS, MINUS, QUESTION_EQUAL, SLASH_EQUAL, GREATER, GREATER_EQUAL, LESS, LESS_EQUAL | ||
| 65 | -> true; | ||
| 66 | default -> false; | ||
| 67 | }; | ||
| 68 | } | ||
| 69 | |||
| 70 | private static BinaryExpression.Operator toBinaryOp(Token token) { | ||
| 71 | return switch (token.type()) { | ||
| 72 | case ASTERISK -> BinaryExpression.Operator.MULTIPLY; | ||
| 73 | case SLASH -> BinaryExpression.Operator.DIVIDE; | ||
| 74 | case PLUS -> BinaryExpression.Operator.ADD; | ||
| 75 | case MINUS -> BinaryExpression.Operator.SUBTRACT; | ||
| 76 | case QUESTION_EQUAL -> BinaryExpression.Operator.EQUALS; | ||
| 77 | case SLASH_EQUAL -> BinaryExpression.Operator.NOT_EQUALS; | ||
| 78 | case GREATER -> BinaryExpression.Operator.GT; | ||
| 79 | case GREATER_EQUAL -> BinaryExpression.Operator.GTE; | ||
| 80 | case LESS -> BinaryExpression.Operator.LT; | ||
| 81 | case LESS_EQUAL -> BinaryExpression.Operator.LTE; | ||
| 82 | default -> throw new IllegalStateException(STR."Token \{token.type()} is not a binary operator"); | ||
| 83 | }; | ||
| 84 | } | ||
| 85 | |||
| 86 | public static boolean isUnaryOp(Token token) { | ||
| 87 | return switch (token.type()) { | ||
| 88 | case PLUS, MINUS, BANG -> true; | ||
| 89 | default -> false; | ||
| 90 | }; | ||
| 91 | } | ||
| 92 | |||
| 93 | public static UnaryExpression.Operator toUnaryOp(Token token) { | ||
| 94 | return switch (token.type()) { | ||
| 95 | case PLUS -> UnaryExpression.Operator.PLUS; | ||
| 96 | case MINUS -> UnaryExpression.Operator.NEGATE; | ||
| 97 | case BANG -> UnaryExpression.Operator.NOT; | ||
| 98 | default -> throw new IllegalStateException(STR."Token \{token.type()} is not a unary operator"); | ||
| 99 | }; | ||
| 100 | } | ||
| 101 | |||
| 102 | private Token consume(Predicate<Token> pred, String msg) throws ParserException { | ||
| 103 | var tok = input.next(); | ||
| 104 | if (!pred.test(tok)) { | ||
| 105 | throw new ParserException(STR."\{msg}, got \{tok}"); | ||
| 106 | } | ||
| 107 | return tok; | ||
| 108 | } | ||
| 109 | |||
| 110 | private Token consumeToken(Token.Type type) throws ParserException { | ||
| 111 | return consume(tok -> tok.type() == type, STR."Expected \{type}"); | ||
| 112 | } | ||
| 113 | |||
| 114 | private boolean maybeConsumeToken(Token.Type type) { | ||
| 115 | if (input.peek().type() == type) { | ||
| 116 | input.next(); | ||
| 117 | return true; | ||
| 118 | } | ||
| 119 | return false; | ||
| 120 | } | ||
| 121 | |||
| 122 | private ArrayExpression parseArray() throws ParserException { | ||
| 123 | consumeToken(Token.Type.BRACKET_LEFT); | ||
| 124 | if (maybeConsumeToken(Token.Type.BRACKET_RIGHT)) { | ||
| 125 | return new ArrayExpression(List.of()); | ||
| 126 | } | ||
| 127 | |||
| 128 | var items = new ArrayList<Expression>(); | ||
| 129 | do { | ||
| 130 | items.add(parseExpression()); | ||
| 131 | } while (maybeConsumeToken(Token.Type.COMMA)); | ||
| 132 | consumeToken(Token.Type.BRACKET_RIGHT); | ||
| 133 | |||
| 134 | return new ArrayExpression(Collections.unmodifiableList(items)); | ||
| 135 | } | ||
| 136 | |||
| 137 | private List<ArgSpec> parseArgSpecs() throws ParserException { | ||
| 138 | var argSpecs = new ArrayList<ArgSpec>(); | ||
| 139 | while (true) { | ||
| 140 | if (input.peek().type() == Token.Type.IDENTIFIER) { | ||
| 141 | argSpecs.add(ArgSpec.named(input.next().literal())); | ||
| 142 | } else if (input.peek().type() == Token.Type.PAREN_LEFT) { | ||
| 143 | consumeToken(Token.Type.PAREN_LEFT); | ||
| 144 | consumeToken(Token.Type.PAREN_RIGHT); | ||
| 145 | argSpecs.add(ArgSpec.nothing()); | ||
| 146 | } else { | ||
| 147 | break; | ||
| 148 | } | ||
| 149 | } | ||
| 150 | return Collections.unmodifiableList(argSpecs); | ||
| 151 | } | ||
| 152 | |||
| 153 | private Expression parseBinaryExpression() throws ParserException { | ||
| 154 | var lhs = parseCallExpression(); | ||
| 155 | if (!isBinaryOp(input.peek())) { | ||
| 156 | return lhs; | ||
| 157 | } | ||
| 158 | |||
| 159 | return parseBinaryExpressionRhs(lhs, toBinaryOp(input.next())); | ||
| 160 | } | ||
| 161 | |||
| 162 | private Expression parseBinaryExpressionRhs(Expression lhs, BinaryExpression.Operator op) throws ParserException { | ||
| 163 | var rhs = parseCallExpression(); | ||
| 164 | if (!isBinaryOp(input.peek())) { | ||
| 165 | return new BinaryExpression(op, lhs, rhs); | ||
| 166 | } | ||
| 167 | |||
| 168 | var op2 = toBinaryOp(input.next()); | ||
| 169 | if (op2.bindsStrongerThan(op)) { | ||
| 170 | return new BinaryExpression(op, lhs, parseBinaryExpressionRhs(rhs, op2)); | ||
| 171 | } else { | ||
| 172 | return parseBinaryExpressionRhs(new BinaryExpression(op, lhs, rhs), op2); | ||
| 173 | } | ||
| 174 | } | ||
| 175 | |||
| 176 | private BooleanLiteral parseBoolean() throws ParserException { | ||
| 177 | var t = consume(tok -> tok.type() == Token.Type.FALSE || tok.type() == Token.Type.TRUE, "Expected TRUE or FALSE"); | ||
| 178 | return new BooleanLiteral(t.type() == Token.Type.TRUE); | ||
| 179 | } | ||
| 180 | |||
| 181 | private Expression parseCallExpression() throws ParserException { | ||
| 182 | var callee = parseSimpleExpression(); | ||
| 183 | while (couldStartSimpleExpression(input.peek().type())) { | ||
| 184 | var arg = parseSimpleExpression(); | ||
| 185 | callee = new CallExpression(callee, arg); | ||
| 186 | } | ||
| 187 | return callee; | ||
| 188 | } | ||
| 189 | |||
| 190 | private Definition parseDefinition() throws ParserException { | ||
| 191 | consumeToken(Token.Type.DEF); | ||
| 192 | var defSpec = parseDefSpec(); | ||
| 193 | consumeToken(Token.Type.EQUAL); | ||
| 194 | var value = parseExpression(); | ||
| 195 | if (defSpec.args().isEmpty()) { | ||
| 196 | return new Definition(defSpec.name(), value); | ||
| 197 | } else { | ||
| 198 | return new Definition(defSpec.name(), new FnExpression(new NonEmptyList<>(defSpec.args()), value)); | ||
| 199 | } | ||
| 200 | } | ||
| 201 | |||
| 202 | private DefSpec parseDefSpec() throws ParserException { | ||
| 203 | var name = consumeToken(Token.Type.IDENTIFIER).literal(); | ||
| 204 | var argSpecs = parseArgSpecs(); | ||
| 205 | return new DefSpec(name, argSpecs); | ||
| 206 | } | ||
| 207 | |||
| 208 | private DoExpression parseDoExpression() throws ParserException { | ||
| 209 | consumeToken(Token.Type.DO); | ||
| 210 | var exprs = new ArrayList<Expression>(); | ||
| 211 | do { | ||
| 212 | exprs.add(parseExpression()); | ||
| 213 | } while (maybeConsumeToken(Token.Type.SEMICOLON)); | ||
| 214 | consumeToken(Token.Type.END); | ||
| 215 | return new DoExpression(Collections.unmodifiableList(exprs)); | ||
| 216 | } | ||
| 217 | |||
| 218 | private Expression parseExpression() throws ParserException { | ||
| 219 | if (isUnaryOp(input.peek())) { | ||
| 220 | return parseUnaryExpression(); | ||
| 221 | } | ||
| 222 | return parseBinaryExpression(); | ||
| 223 | } | ||
| 224 | |||
| 225 | private FnExpression parseFnExpression() throws ParserException { | ||
| 226 | consumeToken(Token.Type.FN); | ||
| 227 | var argSpecs = parseArgSpecs(); | ||
| 228 | if (argSpecs.isEmpty()) { | ||
| 229 | throw new ParserException("Function definition with no arguments"); | ||
| 230 | } | ||
| 231 | var body = maybeConsumeToken(Token.Type.MINUS_GREATER) ? parseExpression() : parseDoExpression(); | ||
| 232 | return new FnExpression(new NonEmptyList<>(argSpecs), body); | ||
| 233 | } | ||
| 234 | |||
| 235 | private IfElseExpression parseIfElseExpression() throws ParserException { | ||
| 236 | consumeToken(Token.Type.IF); | ||
| 237 | var cond = parseExpression(); | ||
| 238 | consumeToken(Token.Type.THEN); | ||
| 239 | var trueBranch = parseExpression(); | ||
| 240 | consumeToken(Token.Type.ELSE); | ||
| 241 | var falseBranch = parseExpression(); | ||
| 242 | return new IfElseExpression(cond, trueBranch, falseBranch); | ||
| 243 | } | ||
| 244 | |||
| 245 | private IntLiteral parseInteger() throws ParserException { | ||
| 246 | var tok = consumeToken(Token.Type.INTEGER); | ||
| 247 | return new IntLiteral(Integer.parseInt(tok.literal())); | ||
| 248 | } | ||
| 249 | |||
| 250 | private LetInExpression parseLetInExpression() throws ParserException { | ||
| 251 | consumeToken(Token.Type.LET); | ||
| 252 | var bindings = new ArrayList<LetInExpression.Binding>(); | ||
| 253 | do { | ||
| 254 | var defSpec = parseDefSpec(); | ||
| 255 | consumeToken(Token.Type.EQUAL); | ||
| 256 | var value = parseExpression(); | ||
| 257 | if (defSpec.args().isEmpty()) { | ||
| 258 | bindings.add(new LetInExpression.Binding(defSpec.name(), value)); | ||
| 259 | } else { | ||
| 260 | var fn = new FnExpression(new NonEmptyList<>(defSpec.args()), value); | ||
| 261 | bindings.add(new LetInExpression.Binding(defSpec.name(), fn)); | ||
| 262 | } | ||
| 263 | } while (maybeConsumeToken(Token.Type.AND)); | ||
| 264 | consumeToken(Token.Type.IN); | ||
| 265 | var body = parseExpression(); | ||
| 266 | return new LetInExpression(Collections.unmodifiableList(bindings), body); | ||
| 267 | } | ||
| 268 | |||
| 269 | private Expression parseSimpleExpression() throws ParserException { | ||
| 270 | return switch (input.peek().type()) { | ||
| 271 | case PAREN_LEFT -> { | ||
| 272 | consumeToken(Token.Type.PAREN_LEFT); | ||
| 273 | if (maybeConsumeToken(Token.Type.PAREN_RIGHT)) { | ||
| 274 | yield VoidExpression.INSTANCE; | ||
| 275 | } | ||
| 276 | var expr = parseExpression(); | ||
| 277 | consumeToken(Token.Type.PAREN_RIGHT); | ||
| 278 | yield expr; | ||
| 279 | } | ||
| 280 | case TRUE, FALSE -> parseBoolean(); | ||
| 281 | case INTEGER -> parseInteger(); | ||
| 282 | case IDENTIFIER -> new VariableExpression(input.next().literal()); | ||
| 283 | case STRING -> parseString(); | ||
| 284 | case BRACKET_LEFT -> parseArray(); | ||
| 285 | case IF -> parseIfElseExpression(); | ||
| 286 | case LET -> parseLetInExpression(); | ||
| 287 | case FN -> parseFnExpression(); | ||
| 288 | case DO -> parseDoExpression(); | ||
| 289 | default -> throw new ParserException(STR."Unexpected token \{input.peek()}"); | ||
| 290 | }; | ||
| 291 | } | ||
| 292 | |||
| 293 | private boolean couldStartSimpleExpression(Token.Type type) { | ||
| 294 | return switch (type) { | ||
| 295 | case PAREN_LEFT, TRUE, FALSE, INTEGER, IDENTIFIER, STRING, BRACKET_LEFT, IF, LET, FN, DO -> true; | ||
| 296 | default -> false; | ||
| 297 | }; | ||
| 298 | } | ||
| 299 | |||
| 300 | private Statement parseStatement() throws ParserException { | ||
| 301 | if (input.peek().type() == Token.Type.DEF) { | ||
| 302 | return parseDefinition(); | ||
| 303 | } else { | ||
| 304 | return new ExpressionStatement(parseExpression()); | ||
| 305 | } | ||
| 306 | } | ||
| 307 | |||
| 308 | private Expression parseString() throws ParserException { | ||
| 309 | var sb = new StringBuilder(); | ||
| 310 | var cps = input.next().literal().codePoints().iterator(); | ||
| 311 | while (cps.hasNext()) { | ||
| 312 | var cp = cps.next(); | ||
| 313 | if (cp == '\\') { | ||
| 314 | var escapeChar = cps.next(); | ||
| 315 | //noinspection UnnecessaryUnboxing | ||
| 316 | sb.append(switch (escapeChar.intValue()) { | ||
| 317 | case '\'' -> '\''; | ||
| 318 | case '"' -> '"'; | ||
| 319 | case 'r' -> '\r'; | ||
| 320 | case 'n' -> '\n'; | ||
| 321 | case 't' -> '\t'; | ||
| 322 | default -> throw new ParserException(STR."Unknown string escape '\\\{escapeChar}'"); | ||
| 323 | }); | ||
| 324 | } else { | ||
| 325 | sb.appendCodePoint(cp); | ||
| 326 | } | ||
| 327 | } | ||
| 328 | return new StringLiteral(sb.toString()); | ||
| 329 | } | ||
| 330 | |||
| 331 | private Expression parseUnaryExpression() throws ParserException { | ||
| 332 | if (isUnaryOp(input.peek())) { | ||
| 333 | var op = toUnaryOp(input.next()); | ||
| 334 | return new UnaryExpression(op, parseUnaryExpression()); | ||
| 335 | } else { | ||
| 336 | return parseSimpleExpression(); | ||
| 337 | } | ||
| 338 | } | ||
| 339 | } | ||
diff --git a/parser/src/main/java/lv/enes/orang/parser/ParserException.java b/parser/src/main/java/lv/enes/orang/parser/ParserException.java new file mode 100644 index 0000000..632ce15 --- /dev/null +++ b/parser/src/main/java/lv/enes/orang/parser/ParserException.java | |||
| @@ -0,0 +1,9 @@ | |||
| 1 | package lv.enes.orang.parser; | ||
| 2 | |||
| 3 | import lv.enes.orang.core.OrangException; | ||
| 4 | |||
| 5 | public class ParserException extends OrangException { | ||
| 6 | public ParserException(String message) { | ||
| 7 | super(message); | ||
| 8 | } | ||
| 9 | } | ||
diff --git a/parser/src/main/java/module-info.java b/parser/src/main/java/module-info.java new file mode 100644 index 0000000..43c2dc5 --- /dev/null +++ b/parser/src/main/java/module-info.java | |||
| @@ -0,0 +1,8 @@ | |||
| 1 | module lv.enes.orang.parser { | ||
| 2 | exports lv.enes.orang.parser; | ||
| 3 | |||
| 4 | requires lv.enes.orang.ast; | ||
| 5 | requires lv.enes.orang.core; | ||
| 6 | requires lv.enes.orang.lexer; | ||
| 7 | requires lv.enes.orang.utils; | ||
| 8 | } \ No newline at end of file | ||