From a50265860470ab53ee137536d738cdc57ff47e72 Mon Sep 17 00:00:00 2001 From: Tom Willemse Date: Wed, 21 Oct 2020 23:42:40 -0700 Subject: [PATCH] Add Scanner for Lox --- src/CMakeLists.txt | 15 ++ src/com/craftinginterpreters/lox/Lox.java | 64 ++++++ src/com/craftinginterpreters/lox/Scanner.java | 207 ++++++++++++++++++ src/com/craftinginterpreters/lox/Token.java | 19 ++ .../craftinginterpreters/lox/TokenType.java | 20 ++ 5 files changed, 325 insertions(+) create mode 100644 src/CMakeLists.txt create mode 100644 src/com/craftinginterpreters/lox/Lox.java create mode 100644 src/com/craftinginterpreters/lox/Scanner.java create mode 100644 src/com/craftinginterpreters/lox/Token.java create mode 100644 src/com/craftinginterpreters/lox/TokenType.java diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..f664d96 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 2.8) + +find_package(Java REQUIRED) +include(UseJava) + +project(Lox NONE) + +add_jar(Lox + com/craftinginterpreters/lox/Lox.java + com/craftinginterpreters/lox/TokenType.java + com/craftinginterpreters/lox/Token.java + ENTRY_POINT com/craftinginterpreters/lox/Lox) + +get_target_property(_jarFile Lox JAR_FILE) +get_target_property(_classDir Lox CLASSDIR) diff --git a/src/com/craftinginterpreters/lox/Lox.java b/src/com/craftinginterpreters/lox/Lox.java new file mode 100644 index 0000000..51d72c7 --- /dev/null +++ b/src/com/craftinginterpreters/lox/Lox.java @@ -0,0 +1,64 @@ +package com.craftinginterpreters.lox; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.List; + +public class Lox { + private static boolean hadError = false; + + public static void main(String[] args) throws IOException { + if (args.length > 1) { + System.out.println("Usage: jlox [script]"); + System.exit(64); + } else if (args.length == 1) { + runFile(args[0]); + } else { + runPrompt(); + } + } + + private static void runFile(String path) throws IOException { + byte[] bytes = Files.readAllBytes(Paths.get(path)); + run(new String(bytes, Charset.defaultCharset())); + + // Indicate an error in the exit code. + if (hadError) System.exit(65); + } + + private static void runPrompt() throws IOException { + InputStreamReader input = new InputStreamReader(System.in); + BufferedReader reader = new BufferedReader(input); + + for (;;) { + System.out.print("> "); + String line = reader.readLine(); + if (line == null) break; + run(line); + hadError = false; + } + } + + private static void run(String source) { + Scanner scanner = new Scanner(source); + List tokens = scanner.scanTokens(); + + // For now, just print the tokens. + for (Token token : tokens) { + System.out.println(token); + } + } + + public static void error(int line, String message) { + report(line, "", message); + } + + private static void report(int line, String where, String message) { + System.err.println("[line " + line + "] Error" + where + ": " + message); + hadError = true; + } +} diff --git a/src/com/craftinginterpreters/lox/Scanner.java b/src/com/craftinginterpreters/lox/Scanner.java new file mode 100644 index 0000000..e21f701 --- /dev/null +++ b/src/com/craftinginterpreters/lox/Scanner.java @@ -0,0 +1,207 @@ +package com.craftinginterpreters.lox; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static com.craftinginterpreters.lox.TokenType.*; + +class Scanner { + private final String source; + private final List tokens = new ArrayList<>(); + private int start = 0; + private int current = 0; + private int line = 1; + private static final Map keywords; + + static { + keywords = new HashMap<>(); + keywords.put("and", AND); + keywords.put("class", CLASS); + keywords.put("else", ELSE); + keywords.put("false", FALSE); + keywords.put("for", FOR); + keywords.put("fun", FUN); + keywords.put("if", IF); + keywords.put("nil", NIL); + keywords.put("or", OR); + keywords.put("print", PRINT); + keywords.put("return", RETURN); + keywords.put("super", SUPER); + keywords.put("this", THIS); + keywords.put("true", TRUE); + keywords.put("var", VAR); + keywords.put("while", WHILE); + } + + Scanner(String source) { + this.source = source; + } + + List scanTokens() { + while (!isAtEnd()) { + // We are at the beginning of the next lexeme. + start = current; + scanToken(); + } + + tokens.add(new Token(EOF, "", null, line)); + return tokens; + } + + private void scanToken() { + char c = advance(); + + switch (c) { + case '(': addToken(LEFT_PAREN); break; + case ')': addToken(RIGHT_PAREN); break; + case '{': addToken(LEFT_BRACE); break; + case '}': addToken(RIGHT_BRACE); break; + case ',': addToken(COMMA); break; + case '.': addToken(DOT); break; + case '-': addToken(MINUS); break; + case '+': addToken(PLUS); break; + case ';': addToken(SEMICOLON); break; + case '*': addToken(STAR); break; + case '!': + addToken(match('=') ? BANG_EQUAL : BANG); + break; + case '=': + addToken(match('=') ? EQUAL_EQUAL : EQUAL); + break; + case '<': + addToken(match('=') ? LESS_EQUAL : LESS); + break; + case '>': + addToken(match('=') ? GREATER_EQUAL : GREATER); + break; + case '/': + if (match('/')) { + // A comment goes until the end of the line. + while (peek() != '\n' && !isAtEnd()) advance(); + } else { + addToken(SLASH); + } + break; + + case ' ': + case '\r': + case '\t': + // Ignore whitespace. + break; + + // I guess this code isn't meant to run on Mac OS 9 and before. + case '\n': + line++; + break; + + case '"': string(); break; + + default: + if (isDigit(c)) { + number(); + } else if (isAlpha(c)) { + identifier(); + } else { + Lox.error(line, "Unexpected character."); + } + break; + } + } + + private void identifier() { + while (isAlphaNumeric(peek())) advance(); + + String text = source.substring(start, current); + TokenType type = keywords.get(text); + if (type == null) type = IDENTIFIER; + addToken(type); + } + + private void number() { + while (isDigit(peek())) advance(); + + // Look for a fractional part. + if (peek() == '.' && isDigit(peekNext())) { + // Consume the "." + advance(); + + while (isDigit(peek())) advance(); + } + + addToken(NUMBER, + Double.parseDouble(source.substring(start, current))); + } + + // I guess we won't be able to include escaped characters in the string? At + // least not escaped " characters. + private void string() { + while (peek() != '"' && !isAtEnd()) { + if (peek() == '\n') line++; + advance(); + } + + if (isAtEnd()) { + Lox.error(line, "Unterminated string."); + return; + } + + // The closing ". + advance(); + + // Trim the surrounding quotes. + String value = source.substring(start + 1, current - 1); + addToken(STRING, value); + } + + private boolean match(char expected) { + if (isAtEnd()) return false; + if (source.charAt(current) != expected) return false; + + current++; + return true; + } + + private char peek() { + if (isAtEnd()) return '\0'; + return source.charAt(current); + } + + private char peekNext() { + if (current + 1 >= source.length()) return '\0'; + return source.charAt(current + 1); + } + + private boolean isAlpha(char c) { + return (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + c == '_'; + } + + private boolean isAlphaNumeric(char c) { + return isAlpha(c) || isDigit(c); + } + + private boolean isDigit(char c) { + return c >= '0' && c <= '9'; + } + + private boolean isAtEnd() { + return current >= source.length(); + } + + private char advance() { + current++; + return source.charAt(current - 1); + } + + private void addToken(TokenType type) { + addToken(type, null); + } + + private void addToken(TokenType type, Object literal) { + String text = source.substring(start, current); + tokens.add(new Token(type, text, literal, line)); + } +} diff --git a/src/com/craftinginterpreters/lox/Token.java b/src/com/craftinginterpreters/lox/Token.java new file mode 100644 index 0000000..84762a4 --- /dev/null +++ b/src/com/craftinginterpreters/lox/Token.java @@ -0,0 +1,19 @@ +package com.craftinginterpreters.lox; + +class Token { + final TokenType type; + final String lexeme; + final Object literal; + final int line; + + Token(TokenType type, String lexeme, Object literal, int line) { + this.type = type; + this.lexeme = lexeme; + this.literal = literal; + this.line = line; + } + + public String toString() { + return type + " " + lexeme + " " + literal; + } +} diff --git a/src/com/craftinginterpreters/lox/TokenType.java b/src/com/craftinginterpreters/lox/TokenType.java new file mode 100644 index 0000000..9ab2a8b --- /dev/null +++ b/src/com/craftinginterpreters/lox/TokenType.java @@ -0,0 +1,20 @@ +package com.craftinginterpreters.lox; + +enum TokenType { + // Single-character tokens. + LEFT_PAREN, RIGHT_PAREN, LEFT_BRACE, RIGHT_BRACE, COMMA, DOT, MINUS, PLUS, + SEMICOLON, SLASH, STAR, + + // One or two character tokens. + BANG, BANG_EQUAL, EQUAL, EQUAL_EQUAL, GREATER, GREATER_EQUAL, LESS, + LESS_EQUAL, + + // Literals. + IDENTIFIER, STRING, NUMBER, + + // Keywords. + AND, CLASS, ELSE, FALSE, FUN, FOR, IF, NIL, OR, PRINT, RETURN, SUPER, THIS, + TRUE, VAR, WHILE, + + EOF +}