diff options
Diffstat (limited to 'src/frontend')
-rw-r--r-- | src/frontend/driver.c | 88 | ||||
-rw-r--r-- | src/frontend/driver.scm | 3 | ||||
-rw-r--r-- | src/frontend/lexer.l | 47 | ||||
-rw-r--r-- | src/frontend/node.c | 84 | ||||
-rw-r--r-- | src/frontend/node.h | 33 | ||||
-rw-r--r-- | src/frontend/parser.y | 116 |
6 files changed, 371 insertions, 0 deletions
diff --git a/src/frontend/driver.c b/src/frontend/driver.c new file mode 100644 index 0000000..52eb3f4 --- /dev/null +++ b/src/frontend/driver.c @@ -0,0 +1,88 @@ +#include "lexer.h" +#include "parser.h" +#include "node.h" +#include <stdio.h> +#include <libguile.h> + +/** + * @param node a tree of node structs representing a C source file. + * @return a scheme representation of node. + */ +SCM +node_to_scm(Node *node) { + SCM ret = scm_list_1(scm_from_locale_symbol(node_types[node->type])); + + SCM field_value = SCM_BOOL_F; + switch (node->type) { + case FUNC: + field_value = scm_from_locale_string(node->field.name); + break; + case EXPR: + field_value = scm_from_locale_symbol(node_ops[node->field.op]); + break; + case CONST: + field_value = scm_from_int32(node->field.val); + break; + default: + ; + } + + if (scm_is_true(field_value)) + ret = scm_append(scm_list_2(ret, scm_list_1(field_value))); + + SCM child; + for (size_t i = 0; i < node->num_children; ++i) { + child = node_to_scm(node->children[i]); + ret = scm_append(scm_list_2(ret, scm_list_1(child))); + } + + return ret; +} + +/** \brief Parser driver for ull. + * Given F, returns an AST of the program represented as a guile s-exp, or #f on a parse error. + * if DO_PARSE is false, only performs lexing, returning #t on a success. + * @param f a preprocessed C file to parse. + * @param do_parse on false, do not perform the parsing stage. + * @return a SCM list representing the parse tree, or #t if do_parse is #f and lexing is successful, or #f otherwise. + */ +SCM +file_to_ast_wrapper(SCM f, SCM do_parse) +{ + char *file = scm_to_locale_string(f); + Node *root = NULL; + SCM ret = SCM_BOOL_F; + + yyin = fopen(file, "r"); + if (yyin != NULL){ + if (scm_is_true(do_parse)) { + if (yyparse(&root) == 0) { + ret = node_to_scm(root); + free_node(root); + } + } else { + ret = SCM_BOOL_T; + int token; + while ((token = yylex()) != 0) { + if (token == YYerror) + ret = SCM_BOOL_F; + } + } + } + + fclose(yyin); + free(file); + return ret; +} + +void +init_parser_driver() +{ + scm_c_define_gsubr("file->ast", 2, 0, 0, file_to_ast_wrapper); + scm_c_export("file->ast", NULL); +} + +void +scm_init_parser_driver_module() { + scm_c_define_module("frontend driver", init_parser_driver, NULL); +} diff --git a/src/frontend/driver.scm b/src/frontend/driver.scm new file mode 100644 index 0000000..402e6e9 --- /dev/null +++ b/src/frontend/driver.scm @@ -0,0 +1,3 @@ +(define-module (frontend driver)) + +(load-extension "./libull.so" "scm_init_parser_driver_module") diff --git a/src/frontend/lexer.l b/src/frontend/lexer.l new file mode 100644 index 0000000..5b72086 --- /dev/null +++ b/src/frontend/lexer.l @@ -0,0 +1,47 @@ +/*** definition section ***/ +%{ + /* C to be copied verbatim */ + #include "parser.h" +%} + +/* read only one input file */ +%option noyywrap +%option yylineno + +DIGIT [0-9] +ALPHA [[:alpha:]_] + +/*** rules section ***/ +%% + +"(" {return L_PAREN;} +")" {return R_PAREN;} +"{" {return L_BRACK;} +"}" {return R_BRACK;} +";" {return SEMI_COL;} +"~" {return COMP;} +"*" {return MULT;} +"/" {return DIV;} +"%" {return MOD;} +"-" {return MINUS;} +"+" {return PLUS;} + +"int" {return INT;} +"void" {return VOID;} +"return" {return RET;} + +{DIGIT}+ {yylval.ival = atol(yytext); return NUMBER;} +{ALPHA}+ { + yylval.sval = strdup(yytext); + return WORD; + } + +[[:space:]]+ {/* discard */} +. { + printf("Error at line %d: unrecognized symbol \"%s\"\n", yylineno, yytext); + return YYerror; +} + +%% + +/*** C section ***/ diff --git a/src/frontend/node.c b/src/frontend/node.c new file mode 100644 index 0000000..344b48a --- /dev/null +++ b/src/frontend/node.c @@ -0,0 +1,84 @@ +#include "node.h" +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +const char* node_types[] = {"prog", "func", "stmt", "expr", "const"}; +const char* node_ops[] = {"not", "neg", "plus", "minus", "mult", "div", "mod"}; + +Node *create_node(enum node_type type) { + Node *node = malloc(sizeof(Node)); + node->type = type; + node->children = NULL; + node->num_children = 0; + return node; +} + +Node *create_function(char *name) { + Node *node = malloc(sizeof(Node)); + node->type = FUNC; + node->field.name = name; + node->children = NULL; + node->num_children = 0; + return node; +} + +Node *create_expr(enum node_op op) { + Node *node = malloc(sizeof(Node)); + node->type = EXPR; + node->field.op = op; + node->children = NULL; + node->num_children = 0; + return node; +} + +Node *create_const(int val) { + Node *node = malloc(sizeof(Node)); + node->type = CONST; + node->field.val = val; + node->children = NULL; + node->num_children = 0; + return node; +} + +void add_child(Node *parent, Node *child) { + size_t new_size = sizeof(Node*) * (parent->num_children + 1); + parent->children = realloc(parent->children, new_size); + + parent->children[parent->num_children] = child; + parent->num_children++; +} + +void free_node(Node *node) { + for (size_t i = 0; i < node->num_children; ++i) { + free_node(node->children[i]); + } + if (node->type == FUNC) + free(node->field.name); + + free(node->children); + free(node); +} + +void *print_node(Node *node, int indent) { + if (node == NULL) { + return; + } + + for (int i = 0; i < indent; ++i) + printf(" "); + + printf("type: %s", node_types[node->type]); + if (node->type == FUNC) + printf(", name: \"%s\"", node->field.name); + if (node->type == EXPR) + printf(", op: %s", node_ops[node->field.op]); + if (node->type == CONST) + printf(", val: %d", node->field.val); + printf("\n"); + + for (size_t i = 0; i < node->num_children; ++i) { + print_node(node->children[i], indent + 1); + } +} diff --git a/src/frontend/node.h b/src/frontend/node.h new file mode 100644 index 0000000..71c8182 --- /dev/null +++ b/src/frontend/node.h @@ -0,0 +1,33 @@ +#ifndef NODE_H +#define NODE_H + +#include <stddef.h> + +enum node_type{PROG, FUNC, STMT, EXPR, CONST}; +enum node_op{COMP_SYM, NEG_SYM, PLUS_SYM, MINUS_SYM, MULT_SYM, DIV_SYM, MOD_SYM}; + +extern const char* node_types[]; +extern const char* node_ops[]; + +typedef union { + char *name; + int val; + enum node_op op; +} node_field; + +typedef struct Node { + enum node_type type; + node_field field; + struct Node **children; + size_t num_children; +} Node; + +Node *create_node(enum node_type type); +Node *create_function(char *name); +Node *create_expr(enum node_op op); +Node *create_const(int val); +void add_child(Node *parent, Node *child); +void free_node(Node *node); +void *print_node(Node *node, int indent); + +#endif diff --git a/src/frontend/parser.y b/src/frontend/parser.y new file mode 100644 index 0000000..aa58f64 --- /dev/null +++ b/src/frontend/parser.y @@ -0,0 +1,116 @@ +/*** definition section ***/ +%{ + /* C to be copied verbatim */ + #include "lexer.h" + #include "node.h" + void yyerror(Node **root, const char *msg); +%} + +%code requires { + #include "node.h" +} + +%union { + Node *node; + int ival; + char *sval; +}; + +%locations +%start input +%parse-param {Node **root} + +%token L_PAREN R_PAREN L_BRACK R_BRACK SEMI_COL COMP MULT DIV MOD MINUS PLUS INT VOID RET +%token <sval> WORD +%token <ival> NUMBER + +%type <node> input +%type <node> func +%type <node> stmt +%type <node> exp +%type <node> term +%type <node> factor +%type <ival> un_op + + +/*** rules section ***/ +%% +input: func { + *root = create_node(PROG); + add_child(*root, $1); + } +; + +func: INT WORD L_PAREN VOID R_PAREN L_BRACK stmt R_BRACK { + $$ = create_function($2); + add_child($$, $7); + } +; + +stmt: RET exp SEMI_COL { + $$ = create_node(STMT); + add_child($$, $2); + } +; + +exp: term { + $$ = $1; + } +| exp PLUS term { + $$ = create_expr(PLUS_SYM); + add_child($$, $1); + add_child($$, $3); + } +| exp MINUS term { + $$ = create_expr(MINUS_SYM); + add_child($$, $1); + add_child($$, $3); + } +| un_op exp { + $$ = create_expr($1); + add_child($$, $2); + } +; + +term: factor { + $$ = $1; + } +| term MULT factor { + $$ = create_expr(MULT_SYM); + add_child($$, $1); + add_child($$, $3); + } +| term DIV factor { + $$ = create_expr(DIV_SYM); + add_child($$, $1); + add_child($$, $3); + } +| term MOD factor { + $$ = create_expr(MOD_SYM); + add_child($$, $1); + add_child($$, $3); + } +; + +factor: NUMBER { + $$ = create_const($1); + } +| L_PAREN exp R_PAREN { + $$ = $2; + } +; + +un_op: COMP { + $$ = COMP_SYM; + } +| MINUS { + $$ = NEG_SYM; + } +; + +%% + +void yyerror(Node **root, const char *msg) { + printf("** Line %d: %s\n", yylineno, msg); +} + |