summaryrefslogtreecommitdiff
path: root/src/frontend
diff options
context:
space:
mode:
Diffstat (limited to 'src/frontend')
-rw-r--r--src/frontend/driver.c88
-rw-r--r--src/frontend/driver.scm3
-rw-r--r--src/frontend/lexer.l47
-rw-r--r--src/frontend/node.c84
-rw-r--r--src/frontend/node.h33
-rw-r--r--src/frontend/parser.y116
6 files changed, 371 insertions, 0 deletions
diff --git a/src/frontend/driver.c b/src/frontend/driver.c
new file mode 100644
index 0000000..52eb3f4
--- /dev/null
+++ b/src/frontend/driver.c
@@ -0,0 +1,88 @@
+#include "lexer.h"
+#include "parser.h"
+#include "node.h"
+#include <stdio.h>
+#include <libguile.h>
+
+/**
+ * @param node a tree of node structs representing a C source file.
+ * @return a scheme representation of node.
+ */
+SCM
+node_to_scm(Node *node) {
+ SCM ret = scm_list_1(scm_from_locale_symbol(node_types[node->type]));
+
+ SCM field_value = SCM_BOOL_F;
+ switch (node->type) {
+ case FUNC:
+ field_value = scm_from_locale_string(node->field.name);
+ break;
+ case EXPR:
+ field_value = scm_from_locale_symbol(node_ops[node->field.op]);
+ break;
+ case CONST:
+ field_value = scm_from_int32(node->field.val);
+ break;
+ default:
+ ;
+ }
+
+ if (scm_is_true(field_value))
+ ret = scm_append(scm_list_2(ret, scm_list_1(field_value)));
+
+ SCM child;
+ for (size_t i = 0; i < node->num_children; ++i) {
+ child = node_to_scm(node->children[i]);
+ ret = scm_append(scm_list_2(ret, scm_list_1(child)));
+ }
+
+ return ret;
+}
+
+/** \brief Parser driver for ull.
+ * Given F, returns an AST of the program represented as a guile s-exp, or #f on a parse error.
+ * if DO_PARSE is false, only performs lexing, returning #t on a success.
+ * @param f a preprocessed C file to parse.
+ * @param do_parse on false, do not perform the parsing stage.
+ * @return a SCM list representing the parse tree, or #t if do_parse is #f and lexing is successful, or #f otherwise.
+ */
+SCM
+file_to_ast_wrapper(SCM f, SCM do_parse)
+{
+ char *file = scm_to_locale_string(f);
+ Node *root = NULL;
+ SCM ret = SCM_BOOL_F;
+
+ yyin = fopen(file, "r");
+ if (yyin != NULL){
+ if (scm_is_true(do_parse)) {
+ if (yyparse(&root) == 0) {
+ ret = node_to_scm(root);
+ free_node(root);
+ }
+ } else {
+ ret = SCM_BOOL_T;
+ int token;
+ while ((token = yylex()) != 0) {
+ if (token == YYerror)
+ ret = SCM_BOOL_F;
+ }
+ }
+ }
+
+ fclose(yyin);
+ free(file);
+ return ret;
+}
+
+void
+init_parser_driver()
+{
+ scm_c_define_gsubr("file->ast", 2, 0, 0, file_to_ast_wrapper);
+ scm_c_export("file->ast", NULL);
+}
+
+void
+scm_init_parser_driver_module() {
+ scm_c_define_module("frontend driver", init_parser_driver, NULL);
+}
diff --git a/src/frontend/driver.scm b/src/frontend/driver.scm
new file mode 100644
index 0000000..402e6e9
--- /dev/null
+++ b/src/frontend/driver.scm
@@ -0,0 +1,3 @@
+(define-module (frontend driver))
+
+(load-extension "./libull.so" "scm_init_parser_driver_module")
diff --git a/src/frontend/lexer.l b/src/frontend/lexer.l
new file mode 100644
index 0000000..5b72086
--- /dev/null
+++ b/src/frontend/lexer.l
@@ -0,0 +1,47 @@
+/*** definition section ***/
+%{
+ /* C to be copied verbatim */
+ #include "parser.h"
+%}
+
+/* read only one input file */
+%option noyywrap
+%option yylineno
+
+DIGIT [0-9]
+ALPHA [[:alpha:]_]
+
+/*** rules section ***/
+%%
+
+"(" {return L_PAREN;}
+")" {return R_PAREN;}
+"{" {return L_BRACK;}
+"}" {return R_BRACK;}
+";" {return SEMI_COL;}
+"~" {return COMP;}
+"*" {return MULT;}
+"/" {return DIV;}
+"%" {return MOD;}
+"-" {return MINUS;}
+"+" {return PLUS;}
+
+"int" {return INT;}
+"void" {return VOID;}
+"return" {return RET;}
+
+{DIGIT}+ {yylval.ival = atol(yytext); return NUMBER;}
+{ALPHA}+ {
+ yylval.sval = strdup(yytext);
+ return WORD;
+ }
+
+[[:space:]]+ {/* discard */}
+. {
+ printf("Error at line %d: unrecognized symbol \"%s\"\n", yylineno, yytext);
+ return YYerror;
+}
+
+%%
+
+/*** C section ***/
diff --git a/src/frontend/node.c b/src/frontend/node.c
new file mode 100644
index 0000000..344b48a
--- /dev/null
+++ b/src/frontend/node.c
@@ -0,0 +1,84 @@
+#include "node.h"
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+const char* node_types[] = {"prog", "func", "stmt", "expr", "const"};
+const char* node_ops[] = {"not", "neg", "plus", "minus", "mult", "div", "mod"};
+
+Node *create_node(enum node_type type) {
+ Node *node = malloc(sizeof(Node));
+ node->type = type;
+ node->children = NULL;
+ node->num_children = 0;
+ return node;
+}
+
+Node *create_function(char *name) {
+ Node *node = malloc(sizeof(Node));
+ node->type = FUNC;
+ node->field.name = name;
+ node->children = NULL;
+ node->num_children = 0;
+ return node;
+}
+
+Node *create_expr(enum node_op op) {
+ Node *node = malloc(sizeof(Node));
+ node->type = EXPR;
+ node->field.op = op;
+ node->children = NULL;
+ node->num_children = 0;
+ return node;
+}
+
+Node *create_const(int val) {
+ Node *node = malloc(sizeof(Node));
+ node->type = CONST;
+ node->field.val = val;
+ node->children = NULL;
+ node->num_children = 0;
+ return node;
+}
+
+void add_child(Node *parent, Node *child) {
+ size_t new_size = sizeof(Node*) * (parent->num_children + 1);
+ parent->children = realloc(parent->children, new_size);
+
+ parent->children[parent->num_children] = child;
+ parent->num_children++;
+}
+
+void free_node(Node *node) {
+ for (size_t i = 0; i < node->num_children; ++i) {
+ free_node(node->children[i]);
+ }
+ if (node->type == FUNC)
+ free(node->field.name);
+
+ free(node->children);
+ free(node);
+}
+
+void *print_node(Node *node, int indent) {
+ if (node == NULL) {
+ return;
+ }
+
+ for (int i = 0; i < indent; ++i)
+ printf(" ");
+
+ printf("type: %s", node_types[node->type]);
+ if (node->type == FUNC)
+ printf(", name: \"%s\"", node->field.name);
+ if (node->type == EXPR)
+ printf(", op: %s", node_ops[node->field.op]);
+ if (node->type == CONST)
+ printf(", val: %d", node->field.val);
+ printf("\n");
+
+ for (size_t i = 0; i < node->num_children; ++i) {
+ print_node(node->children[i], indent + 1);
+ }
+}
diff --git a/src/frontend/node.h b/src/frontend/node.h
new file mode 100644
index 0000000..71c8182
--- /dev/null
+++ b/src/frontend/node.h
@@ -0,0 +1,33 @@
+#ifndef NODE_H
+#define NODE_H
+
+#include <stddef.h>
+
+enum node_type{PROG, FUNC, STMT, EXPR, CONST};
+enum node_op{COMP_SYM, NEG_SYM, PLUS_SYM, MINUS_SYM, MULT_SYM, DIV_SYM, MOD_SYM};
+
+extern const char* node_types[];
+extern const char* node_ops[];
+
+typedef union {
+ char *name;
+ int val;
+ enum node_op op;
+} node_field;
+
+typedef struct Node {
+ enum node_type type;
+ node_field field;
+ struct Node **children;
+ size_t num_children;
+} Node;
+
+Node *create_node(enum node_type type);
+Node *create_function(char *name);
+Node *create_expr(enum node_op op);
+Node *create_const(int val);
+void add_child(Node *parent, Node *child);
+void free_node(Node *node);
+void *print_node(Node *node, int indent);
+
+#endif
diff --git a/src/frontend/parser.y b/src/frontend/parser.y
new file mode 100644
index 0000000..aa58f64
--- /dev/null
+++ b/src/frontend/parser.y
@@ -0,0 +1,116 @@
+/*** definition section ***/
+%{
+ /* C to be copied verbatim */
+ #include "lexer.h"
+ #include "node.h"
+ void yyerror(Node **root, const char *msg);
+%}
+
+%code requires {
+ #include "node.h"
+}
+
+%union {
+ Node *node;
+ int ival;
+ char *sval;
+};
+
+%locations
+%start input
+%parse-param {Node **root}
+
+%token L_PAREN R_PAREN L_BRACK R_BRACK SEMI_COL COMP MULT DIV MOD MINUS PLUS INT VOID RET
+%token <sval> WORD
+%token <ival> NUMBER
+
+%type <node> input
+%type <node> func
+%type <node> stmt
+%type <node> exp
+%type <node> term
+%type <node> factor
+%type <ival> un_op
+
+
+/*** rules section ***/
+%%
+input: func {
+ *root = create_node(PROG);
+ add_child(*root, $1);
+ }
+;
+
+func: INT WORD L_PAREN VOID R_PAREN L_BRACK stmt R_BRACK {
+ $$ = create_function($2);
+ add_child($$, $7);
+ }
+;
+
+stmt: RET exp SEMI_COL {
+ $$ = create_node(STMT);
+ add_child($$, $2);
+ }
+;
+
+exp: term {
+ $$ = $1;
+ }
+| exp PLUS term {
+ $$ = create_expr(PLUS_SYM);
+ add_child($$, $1);
+ add_child($$, $3);
+ }
+| exp MINUS term {
+ $$ = create_expr(MINUS_SYM);
+ add_child($$, $1);
+ add_child($$, $3);
+ }
+| un_op exp {
+ $$ = create_expr($1);
+ add_child($$, $2);
+ }
+;
+
+term: factor {
+ $$ = $1;
+ }
+| term MULT factor {
+ $$ = create_expr(MULT_SYM);
+ add_child($$, $1);
+ add_child($$, $3);
+ }
+| term DIV factor {
+ $$ = create_expr(DIV_SYM);
+ add_child($$, $1);
+ add_child($$, $3);
+ }
+| term MOD factor {
+ $$ = create_expr(MOD_SYM);
+ add_child($$, $1);
+ add_child($$, $3);
+ }
+;
+
+factor: NUMBER {
+ $$ = create_const($1);
+ }
+| L_PAREN exp R_PAREN {
+ $$ = $2;
+ }
+;
+
+un_op: COMP {
+ $$ = COMP_SYM;
+ }
+| MINUS {
+ $$ = NEG_SYM;
+ }
+;
+
+%%
+
+void yyerror(Node **root, const char *msg) {
+ printf("** Line %d: %s\n", yylineno, msg);
+}
+