summaryrefslogtreecommitdiff
path: root/parser/src/main/java
diff options
context:
space:
mode:
authorGravatar Uko Kokņevičs2024-08-18 23:29:37 +0800
committerGravatar Uko Kokņevičs2024-08-18 23:29:37 +0800
commitc7116f9bd0471f8638b888472426e383f64cbcdc (patch)
tree7ff2679d4b526338fad0b317db379ee76fd6f5fc /parser/src/main/java
parentAdded Nothing/empty-tuple arguments (diff)
downloadorang-c7116f9bd0471f8638b888472426e383f64cbcdc.tar.gz
orang-c7116f9bd0471f8638b888472426e383f64cbcdc.tar.xz
orang-c7116f9bd0471f8638b888472426e383f64cbcdc.zip
Some more modularisation
Diffstat (limited to 'parser/src/main/java')
-rw-r--r--parser/src/main/java/lv/enes/orang/parser/Parser.java339
-rw-r--r--parser/src/main/java/lv/enes/orang/parser/ParserException.java9
-rw-r--r--parser/src/main/java/module-info.java8
3 files changed, 356 insertions, 0 deletions
diff --git a/parser/src/main/java/lv/enes/orang/parser/Parser.java b/parser/src/main/java/lv/enes/orang/parser/Parser.java
new file mode 100644
index 0000000..6c86e85
--- /dev/null
+++ b/parser/src/main/java/lv/enes/orang/parser/Parser.java
@@ -0,0 +1,339 @@
1package lv.enes.orang.parser;
2
3import lv.enes.orang.ast.*;
4import lv.enes.orang.ast.IfElseExpression;
5import lv.enes.orang.ast.Statement;
6import lv.enes.orang.lexer.Lexer;
7import lv.enes.orang.lexer.Token;
8import lv.enes.orang.utils.NonEmptyList;
9import lv.enes.orang.utils.PeekableStream;
10
11import java.io.InputStream;
12import java.io.Reader;
13import java.util.ArrayList;
14import java.util.Collections;
15import java.util.Iterator;
16import java.util.List;
17import java.util.function.Predicate;
18
19public class Parser {
20 public static Program parseProgram(InputStream in) throws ParserException {
21 var parser = new Parser(in);
22 return parser.parseProgram();
23 }
24
25 public static Program parseProgram(Reader in) throws ParserException {
26 var parser = new Parser(in);
27 return parser.parseProgram();
28 }
29
30 public static Program parseProgram(String in) throws ParserException {
31 var parser = new Parser(in);
32 return parser.parseProgram();
33 }
34
35 private final PeekableStream<Token> input;
36
37 public Parser(InputStream in) {
38 this(new Lexer(in));
39 }
40
41 public Parser(Reader in) {
42 this(new Lexer(in));
43 }
44
45 public Parser(String in) {
46 this(new Lexer(in));
47 }
48
49 public Parser(Iterator<Token> input) {
50 this.input = new PeekableStream<>(input);
51 }
52
53 public Program parseProgram() throws ParserException {
54 var statements = new ArrayList<Statement>();
55 while (!maybeConsumeToken(Token.Type.EOF)) {
56 statements.add(parseStatement());
57 maybeConsumeToken(Token.Type.SEMICOLON);
58 }
59 return new Program(Collections.unmodifiableList(statements));
60 }
61
62 private static boolean isBinaryOp(Token token) {
63 return switch (token.type()) {
64 case ASTERISK, SLASH, PLUS, MINUS, QUESTION_EQUAL, SLASH_EQUAL, GREATER, GREATER_EQUAL, LESS, LESS_EQUAL
65 -> true;
66 default -> false;
67 };
68 }
69
70 private static BinaryExpression.Operator toBinaryOp(Token token) {
71 return switch (token.type()) {
72 case ASTERISK -> BinaryExpression.Operator.MULTIPLY;
73 case SLASH -> BinaryExpression.Operator.DIVIDE;
74 case PLUS -> BinaryExpression.Operator.ADD;
75 case MINUS -> BinaryExpression.Operator.SUBTRACT;
76 case QUESTION_EQUAL -> BinaryExpression.Operator.EQUALS;
77 case SLASH_EQUAL -> BinaryExpression.Operator.NOT_EQUALS;
78 case GREATER -> BinaryExpression.Operator.GT;
79 case GREATER_EQUAL -> BinaryExpression.Operator.GTE;
80 case LESS -> BinaryExpression.Operator.LT;
81 case LESS_EQUAL -> BinaryExpression.Operator.LTE;
82 default -> throw new IllegalStateException(STR."Token \{token.type()} is not a binary operator");
83 };
84 }
85
86 public static boolean isUnaryOp(Token token) {
87 return switch (token.type()) {
88 case PLUS, MINUS, BANG -> true;
89 default -> false;
90 };
91 }
92
93 public static UnaryExpression.Operator toUnaryOp(Token token) {
94 return switch (token.type()) {
95 case PLUS -> UnaryExpression.Operator.PLUS;
96 case MINUS -> UnaryExpression.Operator.NEGATE;
97 case BANG -> UnaryExpression.Operator.NOT;
98 default -> throw new IllegalStateException(STR."Token \{token.type()} is not a unary operator");
99 };
100 }
101
102 private Token consume(Predicate<Token> pred, String msg) throws ParserException {
103 var tok = input.next();
104 if (!pred.test(tok)) {
105 throw new ParserException(STR."\{msg}, got \{tok}");
106 }
107 return tok;
108 }
109
110 private Token consumeToken(Token.Type type) throws ParserException {
111 return consume(tok -> tok.type() == type, STR."Expected \{type}");
112 }
113
114 private boolean maybeConsumeToken(Token.Type type) {
115 if (input.peek().type() == type) {
116 input.next();
117 return true;
118 }
119 return false;
120 }
121
122 private ArrayExpression parseArray() throws ParserException {
123 consumeToken(Token.Type.BRACKET_LEFT);
124 if (maybeConsumeToken(Token.Type.BRACKET_RIGHT)) {
125 return new ArrayExpression(List.of());
126 }
127
128 var items = new ArrayList<Expression>();
129 do {
130 items.add(parseExpression());
131 } while (maybeConsumeToken(Token.Type.COMMA));
132 consumeToken(Token.Type.BRACKET_RIGHT);
133
134 return new ArrayExpression(Collections.unmodifiableList(items));
135 }
136
137 private List<ArgSpec> parseArgSpecs() throws ParserException {
138 var argSpecs = new ArrayList<ArgSpec>();
139 while (true) {
140 if (input.peek().type() == Token.Type.IDENTIFIER) {
141 argSpecs.add(ArgSpec.named(input.next().literal()));
142 } else if (input.peek().type() == Token.Type.PAREN_LEFT) {
143 consumeToken(Token.Type.PAREN_LEFT);
144 consumeToken(Token.Type.PAREN_RIGHT);
145 argSpecs.add(ArgSpec.nothing());
146 } else {
147 break;
148 }
149 }
150 return Collections.unmodifiableList(argSpecs);
151 }
152
153 private Expression parseBinaryExpression() throws ParserException {
154 var lhs = parseCallExpression();
155 if (!isBinaryOp(input.peek())) {
156 return lhs;
157 }
158
159 return parseBinaryExpressionRhs(lhs, toBinaryOp(input.next()));
160 }
161
162 private Expression parseBinaryExpressionRhs(Expression lhs, BinaryExpression.Operator op) throws ParserException {
163 var rhs = parseCallExpression();
164 if (!isBinaryOp(input.peek())) {
165 return new BinaryExpression(op, lhs, rhs);
166 }
167
168 var op2 = toBinaryOp(input.next());
169 if (op2.bindsStrongerThan(op)) {
170 return new BinaryExpression(op, lhs, parseBinaryExpressionRhs(rhs, op2));
171 } else {
172 return parseBinaryExpressionRhs(new BinaryExpression(op, lhs, rhs), op2);
173 }
174 }
175
176 private BooleanLiteral parseBoolean() throws ParserException {
177 var t = consume(tok -> tok.type() == Token.Type.FALSE || tok.type() == Token.Type.TRUE, "Expected TRUE or FALSE");
178 return new BooleanLiteral(t.type() == Token.Type.TRUE);
179 }
180
181 private Expression parseCallExpression() throws ParserException {
182 var callee = parseSimpleExpression();
183 while (couldStartSimpleExpression(input.peek().type())) {
184 var arg = parseSimpleExpression();
185 callee = new CallExpression(callee, arg);
186 }
187 return callee;
188 }
189
190 private Definition parseDefinition() throws ParserException {
191 consumeToken(Token.Type.DEF);
192 var defSpec = parseDefSpec();
193 consumeToken(Token.Type.EQUAL);
194 var value = parseExpression();
195 if (defSpec.args().isEmpty()) {
196 return new Definition(defSpec.name(), value);
197 } else {
198 return new Definition(defSpec.name(), new FnExpression(new NonEmptyList<>(defSpec.args()), value));
199 }
200 }
201
202 private DefSpec parseDefSpec() throws ParserException {
203 var name = consumeToken(Token.Type.IDENTIFIER).literal();
204 var argSpecs = parseArgSpecs();
205 return new DefSpec(name, argSpecs);
206 }
207
208 private DoExpression parseDoExpression() throws ParserException {
209 consumeToken(Token.Type.DO);
210 var exprs = new ArrayList<Expression>();
211 do {
212 exprs.add(parseExpression());
213 } while (maybeConsumeToken(Token.Type.SEMICOLON));
214 consumeToken(Token.Type.END);
215 return new DoExpression(Collections.unmodifiableList(exprs));
216 }
217
218 private Expression parseExpression() throws ParserException {
219 if (isUnaryOp(input.peek())) {
220 return parseUnaryExpression();
221 }
222 return parseBinaryExpression();
223 }
224
225 private FnExpression parseFnExpression() throws ParserException {
226 consumeToken(Token.Type.FN);
227 var argSpecs = parseArgSpecs();
228 if (argSpecs.isEmpty()) {
229 throw new ParserException("Function definition with no arguments");
230 }
231 var body = maybeConsumeToken(Token.Type.MINUS_GREATER) ? parseExpression() : parseDoExpression();
232 return new FnExpression(new NonEmptyList<>(argSpecs), body);
233 }
234
235 private IfElseExpression parseIfElseExpression() throws ParserException {
236 consumeToken(Token.Type.IF);
237 var cond = parseExpression();
238 consumeToken(Token.Type.THEN);
239 var trueBranch = parseExpression();
240 consumeToken(Token.Type.ELSE);
241 var falseBranch = parseExpression();
242 return new IfElseExpression(cond, trueBranch, falseBranch);
243 }
244
245 private IntLiteral parseInteger() throws ParserException {
246 var tok = consumeToken(Token.Type.INTEGER);
247 return new IntLiteral(Integer.parseInt(tok.literal()));
248 }
249
250 private LetInExpression parseLetInExpression() throws ParserException {
251 consumeToken(Token.Type.LET);
252 var bindings = new ArrayList<LetInExpression.Binding>();
253 do {
254 var defSpec = parseDefSpec();
255 consumeToken(Token.Type.EQUAL);
256 var value = parseExpression();
257 if (defSpec.args().isEmpty()) {
258 bindings.add(new LetInExpression.Binding(defSpec.name(), value));
259 } else {
260 var fn = new FnExpression(new NonEmptyList<>(defSpec.args()), value);
261 bindings.add(new LetInExpression.Binding(defSpec.name(), fn));
262 }
263 } while (maybeConsumeToken(Token.Type.AND));
264 consumeToken(Token.Type.IN);
265 var body = parseExpression();
266 return new LetInExpression(Collections.unmodifiableList(bindings), body);
267 }
268
269 private Expression parseSimpleExpression() throws ParserException {
270 return switch (input.peek().type()) {
271 case PAREN_LEFT -> {
272 consumeToken(Token.Type.PAREN_LEFT);
273 if (maybeConsumeToken(Token.Type.PAREN_RIGHT)) {
274 yield VoidExpression.INSTANCE;
275 }
276 var expr = parseExpression();
277 consumeToken(Token.Type.PAREN_RIGHT);
278 yield expr;
279 }
280 case TRUE, FALSE -> parseBoolean();
281 case INTEGER -> parseInteger();
282 case IDENTIFIER -> new VariableExpression(input.next().literal());
283 case STRING -> parseString();
284 case BRACKET_LEFT -> parseArray();
285 case IF -> parseIfElseExpression();
286 case LET -> parseLetInExpression();
287 case FN -> parseFnExpression();
288 case DO -> parseDoExpression();
289 default -> throw new ParserException(STR."Unexpected token \{input.peek()}");
290 };
291 }
292
293 private boolean couldStartSimpleExpression(Token.Type type) {
294 return switch (type) {
295 case PAREN_LEFT, TRUE, FALSE, INTEGER, IDENTIFIER, STRING, BRACKET_LEFT, IF, LET, FN, DO -> true;
296 default -> false;
297 };
298 }
299
300 private Statement parseStatement() throws ParserException {
301 if (input.peek().type() == Token.Type.DEF) {
302 return parseDefinition();
303 } else {
304 return new ExpressionStatement(parseExpression());
305 }
306 }
307
308 private Expression parseString() throws ParserException {
309 var sb = new StringBuilder();
310 var cps = input.next().literal().codePoints().iterator();
311 while (cps.hasNext()) {
312 var cp = cps.next();
313 if (cp == '\\') {
314 var escapeChar = cps.next();
315 //noinspection UnnecessaryUnboxing
316 sb.append(switch (escapeChar.intValue()) {
317 case '\'' -> '\'';
318 case '"' -> '"';
319 case 'r' -> '\r';
320 case 'n' -> '\n';
321 case 't' -> '\t';
322 default -> throw new ParserException(STR."Unknown string escape '\\\{escapeChar}'");
323 });
324 } else {
325 sb.appendCodePoint(cp);
326 }
327 }
328 return new StringLiteral(sb.toString());
329 }
330
331 private Expression parseUnaryExpression() throws ParserException {
332 if (isUnaryOp(input.peek())) {
333 var op = toUnaryOp(input.next());
334 return new UnaryExpression(op, parseUnaryExpression());
335 } else {
336 return parseSimpleExpression();
337 }
338 }
339}
diff --git a/parser/src/main/java/lv/enes/orang/parser/ParserException.java b/parser/src/main/java/lv/enes/orang/parser/ParserException.java
new file mode 100644
index 0000000..632ce15
--- /dev/null
+++ b/parser/src/main/java/lv/enes/orang/parser/ParserException.java
@@ -0,0 +1,9 @@
1package lv.enes.orang.parser;
2
3import lv.enes.orang.core.OrangException;
4
5public class ParserException extends OrangException {
6 public ParserException(String message) {
7 super(message);
8 }
9}
diff --git a/parser/src/main/java/module-info.java b/parser/src/main/java/module-info.java
new file mode 100644
index 0000000..43c2dc5
--- /dev/null
+++ b/parser/src/main/java/module-info.java
@@ -0,0 +1,8 @@
1module lv.enes.orang.parser {
2 exports lv.enes.orang.parser;
3
4 requires lv.enes.orang.ast;
5 requires lv.enes.orang.core;
6 requires lv.enes.orang.lexer;
7 requires lv.enes.orang.utils;
8} \ No newline at end of file