summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbd <bdunahu@operationnull.com>2025-01-28 14:39:47 -0500
committerbd <bdunahu@operationnull.com>2025-01-28 14:39:47 -0500
commit9e09767e23a4edb6b31540195bfe885f83e080d7 (patch)
tree42454c51ea8e0c8cf90b7c9020dedf3a5627cea2
parentc63a873fe7fbf7947e07acfaf2402fe85100deba (diff)
[Ongoing] Rewrite frontend to use Flex/Bison
This is a merge of another experiment, so the changes are large: - separated "modules" directory into frontend/backend - adjusted module names and moved files for this to happen - removed modules lexer & parser - removed all the unit tests (most were outdated) - added Bison, flex, and C development tools to manifest.scm - added lexer.l, a source file used by the flex utility with a functioning lexing implementation - added parser.y, a source file used by the bison utility with a functioning parser implementation - added node.c and node.h, which parser.y uses to construct an AST of a C source file (up to binary ops) - added driver.c, a Guile-C interface that provides a module to scheme programs - added a Makefile to make all of this - added stuff to .gitignore
-rw-r--r--.dir-locals.el3
-rw-r--r--.gitignore10
-rw-r--r--Makefile34
-rw-r--r--manifest.scm6
-rw-r--r--src/backend/ast/ir.scm (renamed from src/modules/ast/ir.scm)2
-rw-r--r--src/backend/emitter/driver.scm6
-rw-r--r--src/backend/emitter/traverse.scm (renamed from src/modules/emitter/traverse.scm)2
-rw-r--r--src/backend/generator/allocate.scm (renamed from src/modules/generator/allocate.scm)4
-rw-r--r--src/backend/generator/assembly.scm (renamed from src/modules/generator/assembly.scm)8
-rw-r--r--src/backend/generator/driver.scm9
-rw-r--r--src/backend/generator/expansion.scm (renamed from src/modules/generator/expansion.scm)6
-rw-r--r--src/backend/tacky/driver.scm6
-rw-r--r--src/backend/tacky/traverse.scm (renamed from src/modules/tacky/traverse.scm)10
-rw-r--r--src/backend/utils/assign-stack.scm (renamed from src/modules/utils/assign-stack.scm)2
-rw-r--r--src/backend/utils/merge-instructions.scm (renamed from src/modules/utils/merge-instructions.scm)2
-rw-r--r--src/frontend/driver.c88
-rw-r--r--src/frontend/driver.scm3
-rw-r--r--src/frontend/lexer.l47
-rw-r--r--src/frontend/node.c84
-rw-r--r--src/frontend/node.h33
-rw-r--r--src/frontend/parser.y116
-rw-r--r--src/modules/emitter/driver.scm6
-rw-r--r--src/modules/generator/driver.scm9
-rw-r--r--src/modules/lexer/driver.scm83
-rw-r--r--src/modules/parser/driver.scm35
-rw-r--r--src/modules/tacky/driver.scm6
-rwxr-xr-xsrc/ull107
-rw-r--r--src/unit-tests/lexer/lexer.test.scm89
-rw-r--r--src/unit-tests/parser/parser.test.scm30
-rw-r--r--src/unit-tests/utils/t-factory.test.scm19
-rwxr-xr-xull127
31 files changed, 584 insertions, 408 deletions
diff --git a/.dir-locals.el b/.dir-locals.el
index ba479e9..cf9d341 100644
--- a/.dir-locals.el
+++ b/.dir-locals.el
@@ -1,4 +1,5 @@
;;; Directory Local Variables -*- no-byte-compile: t -*-
;;; For more information see (info "(emacs) Directory Variables")
-((scheme-mode . ((compile-command . "guix shell -m manifest.scm -- ./writing-a-c-compiler-tests/test_compiler ./src/ull --chapter 1 --stage lex"))))
+((scheme-mode . ((compile-command . "guix shell -m manifest.scm -- ./writing-a-c-compiler-tests/test_compiler ./ull --chapter 1 --stage lex")))
+ (c-mode . ((compile-command . "guix shell -m manifest.scm -- make clean all"))))
diff --git a/.gitignore b/.gitignore
index aff9071..c2b23bf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,8 @@
-/writing-a-c-compiler-tests/
-/*.log
+writing-a-c-compiler-tests/
+build/
+src/frontend/lexer.c
+src/frontend/lexer.h
+src/frontend/parser.c
+src/frontend/parser.h
+src/frontend/parser.output
+libull.so \ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..de737ad
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,34 @@
+CC = gcc
+CFLAGS = -fPIC -Wall -Wextra -Wpedantic `pkg-config --cflags guile-3.0`
+LIBS = `pkg-config --libs guile-3.0`
+
+TARGET ?= libull.so
+
+SRCDIR := src/frontend
+OBJDIR := build
+
+SRCS = $(SRCDIR)/lexer.c $(SRCDIR)/parser.c $(SRCDIR)/node.c $(SRCDIR)/driver.c
+OBJS = $(OBJDIR)/driver.o $(OBJDIR)/lexer.o $(OBJDIR)/parser.o $(OBJDIR)/node.o
+
+all: $(TARGET)
+
+$(TARGET): $(OBJS)
+ $(CC) -shared -o $@ $^ $(LIBS)
+
+$(OBJS): $(OBJDIR)/%.o : $(SRCDIR)/%.c $(SRCDIR)/lexer.h $(SRCDIR)/parser.h | $(OBJDIR)
+ $(CC) $(CFLAGS) -c $< -o $@
+
+$(SRCDIR)/lexer.h $(SRCDIR)/lexer.c: $(SRCDIR)/lexer.l
+ flex --header-file=$(SRCDIR)/lexer.h -o $(SRCDIR)/lexer.c $(SRCDIR)/lexer.l
+
+$(SRCDIR)/parser.h $(SRCDIR)/parser.c: $(SRCDIR)/parser.y
+ bison -d -v -o $(SRCDIR)/parser.c $(SRCDIR)/parser.y
+
+$(OBJDIR):
+ mkdir -p $(OBJDIR)
+
+clean:
+ rm -f $(OBJS) $(SRCDIR)/lexer.c $(SRCDIR)/lexer.h $(SRCDIR)/parser.c $(SRCDIR)/parser.h $(SRCDIR)/parser.output $(TARGET)
+
+
+.PHONY: all clean
diff --git a/manifest.scm b/manifest.scm
index 54ab185..d028b9c 100644
--- a/manifest.scm
+++ b/manifest.scm
@@ -2,17 +2,21 @@
(gnu packages flex)
(gnu packages commencement)
(gnu packages base)
+ (gnu packages gdb)
(gnu packages python)
(gnu packages guile)
- (gnu packages pkg-config))
+ (gnu packages pkg-config)
+ (gnu packages valgrind))
(packages->manifest
(list
bison
flex
gcc-toolchain
+ gdb
gnu-make
guile-next
pkg-config
python
+ valgrind
))
diff --git a/src/modules/ast/ir.scm b/src/backend/ast/ir.scm
index 50e9e29..102739c 100644
--- a/src/modules/ast/ir.scm
+++ b/src/backend/ast/ir.scm
@@ -1,4 +1,4 @@
-(define-module (ast ir)
+(define-module (backend ast ir)
#:export (prog
srout
neg
diff --git a/src/backend/emitter/driver.scm b/src/backend/emitter/driver.scm
new file mode 100644
index 0000000..bd65d5a
--- /dev/null
+++ b/src/backend/emitter/driver.scm
@@ -0,0 +1,6 @@
+(define-module (backend emitter driver)
+ #:export (assembly->string))
+
+
+(define (assembly->string n)
+ (eval n (resolve-module '(backend emitter traverse))))
diff --git a/src/modules/emitter/traverse.scm b/src/backend/emitter/traverse.scm
index 6c0c19d..0a5b4cc 100644
--- a/src/modules/emitter/traverse.scm
+++ b/src/backend/emitter/traverse.scm
@@ -1,4 +1,4 @@
-(define-module (emitter traverse)
+(define-module (backend emitter traverse)
#:export (prog
subrout
instr
diff --git a/src/modules/generator/allocate.scm b/src/backend/generator/allocate.scm
index f975fca..0d417db 100644
--- a/src/modules/generator/allocate.scm
+++ b/src/backend/generator/allocate.scm
@@ -1,5 +1,5 @@
-(define-module (generator allocate)
- #:use-module (utils assign-stack)
+(define-module (backend generator allocate)
+ #:use-module (backend utils assign-stack)
#:export (expansion->allocate))
diff --git a/src/modules/generator/assembly.scm b/src/backend/generator/assembly.scm
index 55eaaed..8a110f3 100644
--- a/src/modules/generator/assembly.scm
+++ b/src/backend/generator/assembly.scm
@@ -1,7 +1,7 @@
-(define-module (generator assembly)
- #:use-module (ast ir)
- #:use-module (utils assign-stack)
- #:use-module (utils merge-instructions)
+(define-module (backend generator assembly)
+ #:use-module (backend ast ir)
+ #:use-module (backend utils assign-stack)
+ #:use-module (backend utils merge-instructions)
#:export (instrs
mov))
diff --git a/src/backend/generator/driver.scm b/src/backend/generator/driver.scm
new file mode 100644
index 0000000..6625388
--- /dev/null
+++ b/src/backend/generator/driver.scm
@@ -0,0 +1,9 @@
+(define-module (backend generator driver)
+ #:use-module (backend generator allocate)
+ #:export (tacky->assembly))
+
+
+(define (tacky->assembly n)
+ (eval (expansion->allocate
+ (eval n (resolve-module '(backend generator expansion))))
+ (resolve-module '(backend generator assembly))))
diff --git a/src/modules/generator/expansion.scm b/src/backend/generator/expansion.scm
index 5bfa878..9fe2e0a 100644
--- a/src/modules/generator/expansion.scm
+++ b/src/backend/generator/expansion.scm
@@ -1,6 +1,6 @@
-(define-module (generator expansion)
- #:use-module (ast ir)
- #:use-module (utils merge-instructions)
+(define-module (backend generator expansion)
+ #:use-module (backend ast ir)
+ #:use-module (backend utils merge-instructions)
#:export (instrs
not
neg
diff --git a/src/backend/tacky/driver.scm b/src/backend/tacky/driver.scm
new file mode 100644
index 0000000..b9585c7
--- /dev/null
+++ b/src/backend/tacky/driver.scm
@@ -0,0 +1,6 @@
+(define-module (backend tacky driver)
+ #:export (ast->tacky))
+
+
+(define (ast->tacky n)
+ (eval n (resolve-module '(backend tacky traverse))))
diff --git a/src/modules/tacky/traverse.scm b/src/backend/tacky/traverse.scm
index 2fd4ca7..9a1b7aa 100644
--- a/src/modules/tacky/traverse.scm
+++ b/src/backend/tacky/traverse.scm
@@ -1,15 +1,11 @@
-(define-module (tacky traverse)
- #:use-module (ast ir)
- #:export (prog
- func
+(define-module (backend tacky traverse)
+ #:use-module (backend ast ir)
+ #:export (func
stmt
unary
const))
-(define (prog srout)
- (list 'prog srout))
-
(define (func name instrs)
(list 'srout name instrs))
diff --git a/src/modules/utils/assign-stack.scm b/src/backend/utils/assign-stack.scm
index 1096846..dff9fc7 100644
--- a/src/modules/utils/assign-stack.scm
+++ b/src/backend/utils/assign-stack.scm
@@ -1,4 +1,4 @@
-(define-module (utils assign-stack)
+(define-module (backend utils assign-stack)
#:export (make-location
get-frame-size))
diff --git a/src/modules/utils/merge-instructions.scm b/src/backend/utils/merge-instructions.scm
index 754117c..f9fc043 100644
--- a/src/modules/utils/merge-instructions.scm
+++ b/src/backend/utils/merge-instructions.scm
@@ -1,4 +1,4 @@
-(define-module (utils merge-instructions)
+(define-module (backend utils merge-instructions)
#:use-module (srfi srfi-1)
#:export (merge-instr))
diff --git a/src/frontend/driver.c b/src/frontend/driver.c
new file mode 100644
index 0000000..52eb3f4
--- /dev/null
+++ b/src/frontend/driver.c
@@ -0,0 +1,88 @@
+#include "lexer.h"
+#include "parser.h"
+#include "node.h"
+#include <stdio.h>
+#include <libguile.h>
+
+/**
+ * @param node a tree of node structs representing a C source file.
+ * @return a scheme representation of node.
+ */
+SCM
+node_to_scm(Node *node) {
+ SCM ret = scm_list_1(scm_from_locale_symbol(node_types[node->type]));
+
+ SCM field_value = SCM_BOOL_F;
+ switch (node->type) {
+ case FUNC:
+ field_value = scm_from_locale_string(node->field.name);
+ break;
+ case EXPR:
+ field_value = scm_from_locale_symbol(node_ops[node->field.op]);
+ break;
+ case CONST:
+ field_value = scm_from_int32(node->field.val);
+ break;
+ default:
+ ;
+ }
+
+ if (scm_is_true(field_value))
+ ret = scm_append(scm_list_2(ret, scm_list_1(field_value)));
+
+ SCM child;
+ for (size_t i = 0; i < node->num_children; ++i) {
+ child = node_to_scm(node->children[i]);
+ ret = scm_append(scm_list_2(ret, scm_list_1(child)));
+ }
+
+ return ret;
+}
+
+/** \brief Parser driver for ull.
+ * Given F, returns an AST of the program represented as a guile s-exp, or #f on a parse error.
+ * if DO_PARSE is false, only performs lexing, returning #t on a success.
+ * @param f a preprocessed C file to parse.
+ * @param do_parse on false, do not perform the parsing stage.
+ * @return a SCM list representing the parse tree, or #t if do_parse is #f and lexing is successful, or #f otherwise.
+ */
+SCM
+file_to_ast_wrapper(SCM f, SCM do_parse)
+{
+ char *file = scm_to_locale_string(f);
+ Node *root = NULL;
+ SCM ret = SCM_BOOL_F;
+
+ yyin = fopen(file, "r");
+ if (yyin != NULL){
+ if (scm_is_true(do_parse)) {
+ if (yyparse(&root) == 0) {
+ ret = node_to_scm(root);
+ free_node(root);
+ }
+ } else {
+ ret = SCM_BOOL_T;
+ int token;
+ while ((token = yylex()) != 0) {
+ if (token == YYerror)
+ ret = SCM_BOOL_F;
+ }
+ }
+ }
+
+ fclose(yyin);
+ free(file);
+ return ret;
+}
+
+void
+init_parser_driver()
+{
+ scm_c_define_gsubr("file->ast", 2, 0, 0, file_to_ast_wrapper);
+ scm_c_export("file->ast", NULL);
+}
+
+void
+scm_init_parser_driver_module() {
+ scm_c_define_module("frontend driver", init_parser_driver, NULL);
+}
diff --git a/src/frontend/driver.scm b/src/frontend/driver.scm
new file mode 100644
index 0000000..402e6e9
--- /dev/null
+++ b/src/frontend/driver.scm
@@ -0,0 +1,3 @@
+(define-module (frontend driver))
+
+(load-extension "./libull.so" "scm_init_parser_driver_module")
diff --git a/src/frontend/lexer.l b/src/frontend/lexer.l
new file mode 100644
index 0000000..5b72086
--- /dev/null
+++ b/src/frontend/lexer.l
@@ -0,0 +1,47 @@
+/*** definition section ***/
+%{
+ /* C to be copied verbatim */
+ #include "parser.h"
+%}
+
+/* read only one input file */
+%option noyywrap
+%option yylineno
+
+DIGIT [0-9]
+ALPHA [[:alpha:]_]
+
+/*** rules section ***/
+%%
+
+"(" {return L_PAREN;}
+")" {return R_PAREN;}
+"{" {return L_BRACK;}
+"}" {return R_BRACK;}
+";" {return SEMI_COL;}
+"~" {return COMP;}
+"*" {return MULT;}
+"/" {return DIV;}
+"%" {return MOD;}
+"-" {return MINUS;}
+"+" {return PLUS;}
+
+"int" {return INT;}
+"void" {return VOID;}
+"return" {return RET;}
+
+{DIGIT}+ {yylval.ival = atol(yytext); return NUMBER;}
+{ALPHA}+ {
+ yylval.sval = strdup(yytext);
+ return WORD;
+ }
+
+[[:space:]]+ {/* discard */}
+. {
+ printf("Error at line %d: unrecognized symbol \"%s\"\n", yylineno, yytext);
+ return YYerror;
+}
+
+%%
+
+/*** C section ***/
diff --git a/src/frontend/node.c b/src/frontend/node.c
new file mode 100644
index 0000000..344b48a
--- /dev/null
+++ b/src/frontend/node.c
@@ -0,0 +1,84 @@
+#include "node.h"
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+const char* node_types[] = {"prog", "func", "stmt", "expr", "const"};
+const char* node_ops[] = {"not", "neg", "plus", "minus", "mult", "div", "mod"};
+
+Node *create_node(enum node_type type) {
+ Node *node = malloc(sizeof(Node));
+ node->type = type;
+ node->children = NULL;
+ node->num_children = 0;
+ return node;
+}
+
+Node *create_function(char *name) {
+ Node *node = malloc(sizeof(Node));
+ node->type = FUNC;
+ node->field.name = name;
+ node->children = NULL;
+ node->num_children = 0;
+ return node;
+}
+
+Node *create_expr(enum node_op op) {
+ Node *node = malloc(sizeof(Node));
+ node->type = EXPR;
+ node->field.op = op;
+ node->children = NULL;
+ node->num_children = 0;
+ return node;
+}
+
+Node *create_const(int val) {
+ Node *node = malloc(sizeof(Node));
+ node->type = CONST;
+ node->field.val = val;
+ node->children = NULL;
+ node->num_children = 0;
+ return node;
+}
+
+void add_child(Node *parent, Node *child) {
+ size_t new_size = sizeof(Node*) * (parent->num_children + 1);
+ parent->children = realloc(parent->children, new_size);
+
+ parent->children[parent->num_children] = child;
+ parent->num_children++;
+}
+
+void free_node(Node *node) {
+ for (size_t i = 0; i < node->num_children; ++i) {
+ free_node(node->children[i]);
+ }
+ if (node->type == FUNC)
+ free(node->field.name);
+
+ free(node->children);
+ free(node);
+}
+
+void *print_node(Node *node, int indent) {
+ if (node == NULL) {
+ return;
+ }
+
+ for (int i = 0; i < indent; ++i)
+ printf(" ");
+
+ printf("type: %s", node_types[node->type]);
+ if (node->type == FUNC)
+ printf(", name: \"%s\"", node->field.name);
+ if (node->type == EXPR)
+ printf(", op: %s", node_ops[node->field.op]);
+ if (node->type == CONST)
+ printf(", val: %d", node->field.val);
+ printf("\n");
+
+ for (size_t i = 0; i < node->num_children; ++i) {
+ print_node(node->children[i], indent + 1);
+ }
+}
diff --git a/src/frontend/node.h b/src/frontend/node.h
new file mode 100644
index 0000000..71c8182
--- /dev/null
+++ b/src/frontend/node.h
@@ -0,0 +1,33 @@
+#ifndef NODE_H
+#define NODE_H
+
+#include <stddef.h>
+
+enum node_type{PROG, FUNC, STMT, EXPR, CONST};
+enum node_op{COMP_SYM, NEG_SYM, PLUS_SYM, MINUS_SYM, MULT_SYM, DIV_SYM, MOD_SYM};
+
+extern const char* node_types[];
+extern const char* node_ops[];
+
+typedef union {
+ char *name;
+ int val;
+ enum node_op op;
+} node_field;
+
+typedef struct Node {
+ enum node_type type;
+ node_field field;
+ struct Node **children;
+ size_t num_children;
+} Node;
+
+Node *create_node(enum node_type type);
+Node *create_function(char *name);
+Node *create_expr(enum node_op op);
+Node *create_const(int val);
+void add_child(Node *parent, Node *child);
+void free_node(Node *node);
+void *print_node(Node *node, int indent);
+
+#endif
diff --git a/src/frontend/parser.y b/src/frontend/parser.y
new file mode 100644
index 0000000..aa58f64
--- /dev/null
+++ b/src/frontend/parser.y
@@ -0,0 +1,116 @@
+/*** definition section ***/
+%{
+ /* C to be copied verbatim */
+ #include "lexer.h"
+ #include "node.h"
+ void yyerror(Node **root, const char *msg);
+%}
+
+%code requires {
+ #include "node.h"
+}
+
+%union {
+ Node *node;
+ int ival;
+ char *sval;
+};
+
+%locations
+%start input
+%parse-param {Node **root}
+
+%token L_PAREN R_PAREN L_BRACK R_BRACK SEMI_COL COMP MULT DIV MOD MINUS PLUS INT VOID RET
+%token <sval> WORD
+%token <ival> NUMBER
+
+%type <node> input
+%type <node> func
+%type <node> stmt
+%type <node> exp
+%type <node> term
+%type <node> factor
+%type <ival> un_op
+
+
+/*** rules section ***/
+%%
+input: func {
+ *root = create_node(PROG);
+ add_child(*root, $1);
+ }
+;
+
+func: INT WORD L_PAREN VOID R_PAREN L_BRACK stmt R_BRACK {
+ $$ = create_function($2);
+ add_child($$, $7);
+ }
+;
+
+stmt: RET exp SEMI_COL {
+ $$ = create_node(STMT);
+ add_child($$, $2);
+ }
+;
+
+exp: term {
+ $$ = $1;
+ }
+| exp PLUS term {
+ $$ = create_expr(PLUS_SYM);
+ add_child($$, $1);
+ add_child($$, $3);
+ }
+| exp MINUS term {
+ $$ = create_expr(MINUS_SYM);
+ add_child($$, $1);
+ add_child($$, $3);
+ }
+| un_op exp {
+ $$ = create_expr($1);
+ add_child($$, $2);
+ }
+;
+
+term: factor {
+ $$ = $1;
+ }
+| term MULT factor {
+ $$ = create_expr(MULT_SYM);
+ add_child($$, $1);
+ add_child($$, $3);
+ }
+| term DIV factor {
+ $$ = create_expr(DIV_SYM);
+ add_child($$, $1);
+ add_child($$, $3);
+ }
+| term MOD factor {
+ $$ = create_expr(MOD_SYM);
+ add_child($$, $1);
+ add_child($$, $3);
+ }
+;
+
+factor: NUMBER {
+ $$ = create_const($1);
+ }
+| L_PAREN exp R_PAREN {
+ $$ = $2;
+ }
+;
+
+un_op: COMP {
+ $$ = COMP_SYM;
+ }
+| MINUS {
+ $$ = NEG_SYM;
+ }
+;
+
+%%
+
+void yyerror(Node **root, const char *msg) {
+ printf("** Line %d: %s\n", yylineno, msg);
+}
+
diff --git a/src/modules/emitter/driver.scm b/src/modules/emitter/driver.scm
deleted file mode 100644
index 7ce8517..0000000
--- a/src/modules/emitter/driver.scm
+++ /dev/null
@@ -1,6 +0,0 @@
-(define-module (emitter driver)
- #:export (assembly->string))
-
-
-(define (assembly->string n)
- (eval n (resolve-module '(emitter traverse))))
diff --git a/src/modules/generator/driver.scm b/src/modules/generator/driver.scm
deleted file mode 100644
index 8b9ed30..0000000
--- a/src/modules/generator/driver.scm
+++ /dev/null
@@ -1,9 +0,0 @@
-(define-module (generator driver)
- #:use-module (generator allocate)
- #:export (tacky->assembly))
-
-
-(define (tacky->assembly n)
- (eval (expansion->allocate
- (eval n (resolve-module '(generator expansion))))
- (resolve-module '(generator assembly))))
diff --git a/src/modules/lexer/driver.scm b/src/modules/lexer/driver.scm
deleted file mode 100644
index 92c8687..0000000
--- a/src/modules/lexer/driver.scm
+++ /dev/null
@@ -1,83 +0,0 @@
-(define-module (lexer driver)
- #:export (read-tokens))
-
-(define (read-tokens)
- "Returns a stream of tokens from the
-current input port."
- (define (read-tokens-loop tokens-so-far)
- (let ((token (read-token)))
- (if token
- (read-tokens-loop (cons token tokens-so-far))
- (reverse tokens-so-far))))
- (read-tokens-loop '()))
-
-(define (read-token)
- (let ((chr (read-char)))
- (cond
- ((eof-object? chr) #f)
- ((char-whitespace? chr)
- (read-token))
-
- ((eqv? chr #\() 'left-paren)
- ((eqv? chr #\)) 'right-paren)
- ((eqv? chr #\{) 'open-brace)
- ((eqv? chr #\}) 'close-brace)
- ((eqv? chr #\;) 'semi-colon)
- ((eqv? chr #\~) 'not)
- ((eqv? chr #\*) 'mul)
- ((eqv? chr #\/) 'div)
- ((eqv? chr #\%) 'mod)
-
- ((eqv? chr #\+)
- (if (take-double? chr)
- '++
- 'add))
-
- ((eqv? chr #\-)
- (if (take-double? chr)
- '--
- 'neg))
-
- ((char-numeric? chr)
- (read-constant chr))
-
- ((char-alphabetic? chr)
- (lookup-keyword (read-identifier chr)))
-
- (#t (error "illegal lexical syntax")))))
-
-(define (read-constant chr)
- (define (read-constant-helper chrs-so-far)
- (let ((chr (peek-char)))
- (cond ((and (not (eof-object? chr)) (char-numeric? chr))
- (read-constant-helper (cons (read-char) chrs-so-far)))
- ((and (not (eof-object? chr)) (char-alphabetic? chr))
- (error "identifier starting with digit"))
- (#t (reverse chrs-so-far)))))
- (string->number (list->string (read-constant-helper (list chr)))))
-
-(define (read-identifier chr)
- (define (read-identifier-helper chrs-so-far)
- (let ((chr (peek-char)))
- (cond ((and (not (eof-object? chr))
- (or (char-alphabetic? chr)
- (char-numeric? chr)
- (eqv? chr #\_)))
- (read-identifier-helper (cons (read-char) chrs-so-far)))
- (#t (reverse chrs-so-far)))))
- (list->string (read-identifier-helper (list chr))))
-
-(define (take-double? chr)
- (if (eqv? chr (peek-char))
- (read-char)
- #f))
-
-(define (lookup-keyword id)
- "Given identifier ID, converts it to a keyword
-if one is known."
- (let ((found (assoc
- id
- '(("int" . int)
- ("void" . void)
- ("return" . return)))))
- (if found (cdr found) id)))
diff --git a/src/modules/parser/driver.scm b/src/modules/parser/driver.scm
deleted file mode 100644
index e464a7a..0000000
--- a/src/modules/parser/driver.scm
+++ /dev/null
@@ -1,35 +0,0 @@
-(define-module (parser driver)
- #:use-module (ice-9 match)
- #:export (tokens->ast))
-
-
-(define (die)
- (error "syntax error"))
-
-(define (tokens->ast tokens)
- (match tokens
- ((func ...)
- (list 'prog (function func)))
- (_ (die))))
-
-(define (function tokens)
- (match tokens
- (('int (? string? id) 'left-paren 'void 'right-paren 'open-brace stmt ... 'close-brace)
- (list 'func id (statement stmt)))
- (_ (die))))
-
-(define (statement tokens)
- (match tokens
- (('return expr ... 'semi-colon)
- (list 'stmt (expression expr)))
- (_ (die))))
-
-(define (expression tokens)
- (match tokens
- (((? integer? int))
- (list 'const int))
- (((or 'neg 'not) expr ...)
- (list 'expr (car tokens) (expression expr)))
- (('left-paren expr ... 'right-paren)
- (expression expr))
- (_ (die))))
diff --git a/src/modules/tacky/driver.scm b/src/modules/tacky/driver.scm
deleted file mode 100644
index 9696306..0000000
--- a/src/modules/tacky/driver.scm
+++ /dev/null
@@ -1,6 +0,0 @@
-(define-module (tacky driver)
- #:export (ast->tacky))
-
-
-(define (ast->tacky n)
- (eval n (resolve-module '(tacky traverse))))
diff --git a/src/ull b/src/ull
deleted file mode 100755
index f878c4e..0000000
--- a/src/ull
+++ /dev/null
@@ -1,107 +0,0 @@
-#!/run/current-system/profile/bin/guile \
--L ./src/modules -e main -s
-!#
-
-
-(use-modules (ice-9 getopt-long)
- (ice-9 popen)
- (ice-9 pretty-print)
- (lexer driver)
- (parser driver)
- (tacky driver)
- (generator driver)
- (emitter driver))
-
-
-(define version "v0.1.1")
-
-
-(define (error message)
- (display (string-concatenate `(,message "
-Usage:
- ull [OPTIONS] file
-Options:
- --version, -v: print version information
- --debug, -d: turn on verbose output
- --lex, -l: run the lexer, but stop before assembly generation
- --parse, -p: run the lexer and parser, but stop before assembly generation
- --tacky, -t: run the tacky generation stage, but stop before assembly generation
- --codegen, -c: perform lexing, parsing, tacky, and assembly generation, but stop before code emission\n")))
- (exit #f))
-
-(define (c-extension? file)
- (let ((extension (string-drop file (- (string-length file) 2))))
- (string=? extension ".c")))
-
-(define (process file parse? tack? generate? write? dest)
- "Driver for lexing, parsing, and assembly generation."
- (let* ((port (preprocess file))
- (tokens (begin (set-current-input-port port)
- (read-tokens))))
- (close-input-port port)
- (when parse?
- (let ((c-ast (tokens->ast tokens)))
- (pretty-print c-ast)
- (when tack?
- (let ((tacky-ast (ast->tacky c-ast)))
- (pretty-print tacky-ast)
- (when generate?
- (let ((assembly-ast (tacky->assembly tacky-ast)))
- (pretty-print assembly-ast)
- (when write?
- (let ((program (assembly->string assembly-ast))
- (a-file (string-append dest ".s")))
- (when (file-exists? a-file)
- (delete-file a-file))
- (let ((port (open-output-file a-file)))
- (display program port)
- (close-port port))
- (when (postprocess a-file dest)
- (display (string-concatenate `("Postprocess reported success (wrote " ,dest ").\n"))))
- ))))))))))
-
-(define (preprocess file)
- "Returns an input port containing FILE processed with gcc."
- (open-input-pipe (string-append "gcc -E -P " file)))
-
-(define (postprocess src dest)
- "Assembles and links SRC, producing executable DEST.
-Returns #f on a failure, #t on a success."
- (zero? (system (string-concatenate `("gcc " ,src " -o " ,dest)))))
-
-(define (main args)
- "Entry point for ull. Handles user args and performs initial validity check."
- (let* ((option-spec
- '((version (single-char #\v) (value #f))
- (debug (single-char #\d) (value #f))
- (lex (single-char #\l) (value #f))
- (parse (single-char #\p) (value #f))
- (tacky (single-char #\t) (value #f))
- (codegen (single-char #\c) (value #f))))
- (options (getopt-long args option-spec))
- (rest (option-ref options '() #f))
- (file (if (null? rest) #f (car rest))))
- (cond
- ((option-ref options 'version #f)
- (display (string-concatenate `("ull (" ,version ")\n"))))
- ((not (equal? 1 (length rest))) (error "Wrong number of arguments."))
- ((or (not file)
- (not (access? file R_OK))
- (not (equal? 'regular (stat:type (stat file))))
- (not (c-extension? file))) (error "The file could not be read, or it is not a C source code file."))
- (#t
- (let ((parse? (not (option-ref options 'lex #f)))
- (tack? (not (option-ref options 'parse #f)))
- (generate? (not (option-ref options 'tacky #f)))
- (write? (not (option-ref options 'codegen #f))))
- (process file
- parse?
- tack?
- generate?
- write?
- (string-drop-right file 2)))))))
-
-
-;; Local Variables:
-;; mode: scheme
-;; End:
diff --git a/src/unit-tests/lexer/lexer.test.scm b/src/unit-tests/lexer/lexer.test.scm
deleted file mode 100644
index ce3520d..0000000
--- a/src/unit-tests/lexer/lexer.test.scm
+++ /dev/null
@@ -1,89 +0,0 @@
-;; -*- compile-command: "guile -L ./src/modules ./src/unit-tests/lexer/lexer.test.scm"; -*-
-(use-modules (srfi srfi-64)
- (lexer lexer))
-
-(define (read-this str)
- (set-current-input-port
- (open-input-string str))
- str)
-
-
-(test-begin "lexer-harness")
-
-
-(test-equal (read-this "")
- '()
- (read-tokens))
-
-(test-equal (read-this "(")
- '(left-paren)
- (read-tokens))
-
-(test-equal (read-this "((")
- '(left-paren left-paren)
- (read-tokens))
-
-(test-equal (read-this "( )")
- '(left-paren right-paren)
- (read-tokens))
-
-(test-equal (read-this "( {;} {((};})")
- '(left-paren open-brace semi-colon close-brace open-brace left-paren left-paren close-brace semi-colon close-brace right-paren)
- (read-tokens))
-
-(test-equal (read-this "1")
- '(1)
- (read-tokens))
-
-(test-equal (read-this "~+-")
- '(complement add sub)
- (read-tokens))
-
-(test-equal (read-this "---")
- '(decrement sub)
- (read-tokens))
-
-(test-equal (read-this "+--")
- '(add decrement)
- (read-tokens))
-
-(test-equal (read-this "0")
- '(0)
- (read-tokens))
-
-(test-equal (read-this "0011001")
- '(11001)
- (read-tokens))
-
-(test-equal (read-this "12 {34")
- '(12 open-brace 34)
- (read-tokens))
-
-(test-equal (read-this "34;")
- '(34 semi-colon)
- (read-tokens))
-
-(test-error (read-this "3.4")
- (read-tokens))
-
-(test-equal (read-this "a")
- '("a")
- (read-tokens))
-
-(test-equal (read-this "a_2")
- '("a_2")
- (read-tokens))
-
-(test-error (read-this "1foo")
- (read-tokens))
-
-(test-equal (read-this "void")
- '(void)
- (read-tokens))
-
-(test-equal (read-this "int main(void) {return 2;}")
- '(int "main" left-paren void right-paren open-brace return 2 semi-colon close-brace)
- (read-tokens))
-
-
-(test-end "lexer-harness")
diff --git a/src/unit-tests/parser/parser.test.scm b/src/unit-tests/parser/parser.test.scm
deleted file mode 100644
index 32e1e6c..0000000
--- a/src/unit-tests/parser/parser.test.scm
+++ /dev/null
@@ -1,30 +0,0 @@
-;; -*- compile-command: "guile -L ./src/modules ./src/unit-tests/parser/parser.test.scm"; -*-
-(use-modules (srfi srfi-64)
- (parser parser))
-
-
-(test-begin "parser-harness")
-
-
-(test-equal "trivial function main 2"
- '(program (function (identifier "main") (return (constant 2))))
- (p-program '(int "main" left-paren void right-paren open-brace return 2 semi-colon close-brace)))
-
-(test-equal "trivial function foo 4"
- '(program (function (identifier "foo") (return (constant 4))))
- (p-program '(int "foo" left-paren void right-paren open-brace return 4 semi-colon close-brace)))
-
-(test-error "trivial function bad double return"
- (p-program '(int "foo" left-paren void right-paren open-brace return return 4 semi-colon close-brace)))
-
-(test-error "trivial function bad parens"
- (p-program '(int "foo" right-paren void left-paren open-brace return 4 semi-colon close-brace)))
-
-(test-error "trivial function bad int parameter"
- (p-program '(int "foo" left-paren int right-paren open-brace return 4 semi-colon close-brace)))
-
-(test-error "trivial function incomplete function"
- (p-program '(int "foo" left-paren void right-paren open-brace return)))
-
-
-(test-end "parser-harness")
diff --git a/src/unit-tests/utils/t-factory.test.scm b/src/unit-tests/utils/t-factory.test.scm
deleted file mode 100644
index d780c17..0000000
--- a/src/unit-tests/utils/t-factory.test.scm
+++ /dev/null
@@ -1,19 +0,0 @@
-;; -*- compile-command: "guile -L ./src/modules ./src/unit-tests/utils/t-factory.test.scm"; -*-
-(use-modules (srfi srfi-64)
- (ast assembly-tree)
- (utils t-factory))
-
-
-(test-begin "t-factory-harness")
-
-
-(test-equal "make first temporary"
- 't.101
- (temporary-name (make-t)))
-
-(test-equal "make second temporary"
- 't.102
- (temporary-name (make-t)))
-
-
-(test-end "t-factory-harness")
diff --git a/ull b/ull
new file mode 100755
index 0000000..004d7e5
--- /dev/null
+++ b/ull
@@ -0,0 +1,127 @@
+#!/run/current-system/profile/bin/guile \
+-L ./src -e main -s
+!#
+
+(use-modules (ice-9 getopt-long)
+ (ice-9 popen)
+ (ice-9 pretty-print)
+ (frontend driver)
+ (backend tacky driver)
+ (backend generator driver)
+ (backend emitter driver))
+
+(define version "v0.1.1")
+
+(define (error message)
+ (display (string-concatenate `(,message "
+Usage:
+ ull [OPTIONS] file
+Options:
+ --version, -v: print version information
+ --debug, -d: turn on verbose output
+ --lex, -l: run the lexer, but stop before assembly generation
+ --parse, -p: run the lexer and parser, but stop before assembly generation
+ --tacky, -t: run the tacky generation stage, but stop before assembly generation
+ --codegen, -c: perform lexing, parsing, tacky, and assembly generation, but stop before code emission\n")))
+ (exit #f))
+
+(define (c-extension? file)
+ (let ((extension (string-drop file (- (string-length file) 2))))
+ (string=? extension ".c")))
+
+(define (cleanup-file file)
+ (when (file-exists? file)
+ (delete-file file)))
+
+(define (preprocess src dst)
+ "Preprocesses SRC with gcc and write it to DST."
+ (system (string-concatenate `("gcc -E -P " ,src " -o " ,dst)))
+ (display (string-concatenate `("Preprocess reported success (wrote " ,dst ").\n"))))
+
+(define (frontend file parse?)
+ "Wrapper for lexing and parsing. Returns an AST representing FILE."
+ (let ((c-ast (file->ast file parse?)))
+ (or c-ast
+ (begin
+ (display (format #f "~a reported failure!\n" (or (and parse? "Parser") "Lexer")))
+ (cleanup-file file)
+ (exit #f)))))
+
+(define (backend c-ast tack? generate? write?)
+ "Driver for tacky and assembly generation, code emission. Returns an assembly program representing C-AST."
+ (pretty-print c-ast)
+ (display "===^file->ast^===\n")
+ (when tack?
+ (let ((tacky-ast (ast->tacky c-ast)))
+ (pretty-print tacky-ast)
+ (display "===^ast->tacky^===\n")
+ (when generate?
+ (let ((assembly-ast (tacky->assembly tacky-ast)))
+ (pretty-print assembly-ast)
+ (display "===^tacky->assembly^===\n")
+ (when write?
+ (assembly->string assembly-ast)))))))
+
+(define (postprocess src dest)
+ "Assembles and links SRC, producing executable DEST.
+Returns #f on a failure, #t on a success."
+ (zero? (system (string-concatenate `("gcc " ,src " -o " ,dest)))))
+
+(define (write str dst)
+ "Writes STR to the file at DST."
+ (cleanup-file dst)
+ (let ((port (open-output-file dst)))
+ (display str port)
+ (close-port port))
+ (display (string-concatenate `("Assembly generation reported success (wrote " ,dst ").\n"))))
+
+(define (main args)
+ "Entry point for ull. Handles user args and performs initial validity check."
+ (let* ((option-spec
+ '((version (single-char #\v) (value #f))
+ (debug (single-char #\d) (value #f))
+ (lex (single-char #\l) (value #f))
+ (parse (single-char #\p) (value #f))
+ (tacky (single-char #\t) (value #f))
+ (codegen (single-char #\c) (value #f))))
+ (options (getopt-long args option-spec))
+ (rest (option-ref options '() #f))
+ (file-name (if (null? rest) #f (car rest))) )
+ (cond
+ ;; display version number
+ ((option-ref options 'version #f)
+ (display (string-concatenate `("ull (" ,version ")\n"))))
+
+ ;; complain about usage
+ ((not (equal? 1 (length rest))) (error "Wrong number of arguments."))
+ ((or (not file-name)
+ (not (access? file-name R_OK))
+ (not (equal? 'regular (stat:type (stat file-name))))
+ (not (c-extension? file-name))) (error "The file could not be read, or it is not a C source code file."))
+
+ (#t
+ (let ((parse? (not (option-ref options 'lex #f)))
+ (tack? (not (option-ref options 'parse #f)))
+ (generate? (not (option-ref options 'tacky #f)))
+ (write? (not (option-ref options 'codegen #f)))
+ (preprocessed-file-name
+ (string-concatenate `(,(string-drop-right file-name 1) "i")))
+ (assembly-file-name
+ (string-concatenate `(,(string-drop-right file-name 1) "s")))
+ (executable-file-name
+ (string-drop-right file-name 2)))
+
+ ;; preprocess the file
+ (preprocess file-name preprocessed-file-name)
+ ;; call the frontend
+ (let ((c-ast (frontend preprocessed-file-name parse?)))
+ (cleanup-file preprocessed-file-name)
+ (if (and parse? c-ast)
+ ;; call the backend
+ (begin (display "Parser reported success\n")
+ (let ((program (backend c-ast tack? generate? write?)))
+ (when write?
+ (write program assembly-file-name)
+ ;; call postprocessing
+ (postprocess assembly-file-name executable-file-name))))
+ (display "Tokenizer successful. "))))))))