From 8ceaf87c43d412b758bb7562364f7ae8bee913ae Mon Sep 17 00:00:00 2001 From: bd Date: Tue, 8 Apr 2025 21:41:35 -0400 Subject: Add parsing functionality for R-type expressions --- src/main.lisp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/main.lisp') diff --git a/src/main.lisp b/src/main.lisp index f6e5754..8a36a66 100644 --- a/src/main.lisp +++ b/src/main.lisp @@ -47,8 +47,14 @@ _/_/ _/_/ " (t (let ((tokens (lex:file->tokens file))) (if tokens (progn (pprint tokens) - (terpri)) + (terpri) + (format t "---~%")) (error "The file does not exist, or it could not be opened.~%")) + (if parse? + (let ((tokens (parse:tokens->ast tokens))) + (progn (pprint tokens) + (terpri) + (format t "---~%")))) (format t "Nitimur in Vetitum~%")))))) -- cgit v1.2.3 From ce80845b5cb5b473327e4e561342490576d83a90 Mon Sep 17 00:00:00 2001 From: bd Date: Wed, 9 Apr 2025 20:25:57 -0400 Subject: Successfully parse every instruction type --- input/add-loop.asm | 13 ++++--- rva.asd | 4 +- src/lex.lisp | 110 ----------------------------------------------------- src/main.lisp | 21 ++-------- src/package.lisp | 12 +----- src/parse.lisp | 95 +++++++++++++++++++++++++-------------------- t/lex.lisp | 104 -------------------------------------------------- t/parse.lisp | 24 +++++++----- 8 files changed, 81 insertions(+), 302 deletions(-) delete mode 100644 src/lex.lisp delete mode 100644 t/lex.lisp (limited to 'src/main.lisp') diff --git a/input/add-loop.asm b/input/add-loop.asm index 6379831..f8d97a1 100644 --- a/input/add-loop.asm +++ b/input/add-loop.asm @@ -1,17 +1,18 @@ - addi $fp $0 0x200 +.text + addi $2 $0 0x200 addi $5 $0 0x1 - store $5 0($fp) + store $5 0($2) addi $5 $0 0x2 - store $5 1($fp) + store $5 1($2) addi $5 $0 0x3 - store $5 2($fp) + store $5 2($2) addi $5 $0 0x4 - store $5 3($fp) + store $5 3($2) addi $5 $0 0x0 addi $6 $0 0x3 jrl CHECK LOOP: - add $9 $fp $5 + add $9 $2 $5 load $7 -0($9) load $8 +1($9) add $7 $7 $8 diff --git a/rva.asd b/rva.asd index dd904fb..9271472 100644 --- a/rva.asd +++ b/rva.asd @@ -15,8 +15,7 @@ :serial t :components ((:file "package") (:file "util") - (:file "lex") - (:file "parse") + (:file "parse") (:file "main")))) :long-description #.(uiop:read-file-string @@ -37,7 +36,6 @@ :components ((:file "package") (:file "main") (:file "util") - (:file "lex") (:file "parse")))) :perform (test-op (o s) (uiop:symbol-call :rva-tests :test-rva))) diff --git a/src/lex.lisp b/src/lex.lisp deleted file mode 100644 index c86d17c..0000000 --- a/src/lex.lisp +++ /dev/null @@ -1,110 +0,0 @@ -(in-package #:lex) - -(define-condition lexer-error (error) - ((message :initarg :message - :initform nil - :reader message)) - (:report (lambda (condition stream) - (format stream "~A" (message condition)))) - (:documentation "Dedicated error for an invalid lex.")) - -(defun file->tokens (file) - "Opens FILE and parses returns a list of tokens, or -NIL if the file could not be opened." - - (defun read-instr (lst tokens-so-far) - "Collects tokens in FILE into TOKENS-SO-FAR, splitting on a newline." - (let ((token (read-token))) - (cond ((null token) (reverse tokens-so-far)) - ((eq token 'nl) - (cons (reverse tokens-so-far) (read-instr nil nil))) - (t (read-instr lst (cons token tokens-so-far)))))) - - (and (probe-file file) - (with-open-file (*standard-input* file :direction :input) - (remove nil (read-instr '() '()))))) - -(defun read-token () - "Reads *STANDARD-INPUT* and returns a token, or nil if the end -of file has been reached. -Whitespace, commas, colons, and parentheses are token delimiters. -Comments start with a semi-colon ';' and all tokens after are ignored." - (let ((chr (read-char *standard-input* nil))) - (cond - ((null chr) chr) - - ((char= chr #\linefeed) 'nl) - - ((whitespace-char-p chr) - (read-token)) - - ((char= chr #\;) - (progn (read-line *standard-input* nil) - 'nl)) - - ((char= chr #\.) 'period) - - ((char= chr #\() 'left-paren) - ((char= chr #\)) 'right-paren) - - ((char= chr #\:) 'colon) - ((char= chr #\$) 'dollar) - - ((char= chr #\+) 'plus) - ((char= chr #\-) 'minus) - - ((digit-char-p chr) - (read-immediate chr)) - - ((alpha-char-p chr) - (read-keyword chr)) - - (t (error 'lexer-error - :message - (format nil "LEX failled--~a is not a valid lexical symbol.~%" chr)))))) - -(defun read-immediate (chr) - "Reads a sequence of digits, in base 2, 8, 10, or 16.. Throws -`invalid-immediate-or-keyword' error if an alphabetic character is encountered." - ;; may be combined with read-keyword-helper - (defun read-immediate-helper (chrs-so-far) - (let ((chr (peek-char nil *standard-input* nil))) - (cond ((and (not (null chr)) (digit-char-p chr)) - (read-immediate-helper (cons (read-char *standard-input* nil) chrs-so-far))) - ((and (not (null chr)) (alpha-char-p chr)) - (error 'lexer-error - :message - (format nil "LEX failed--encountered ~a while reading immediate.~%" chr))) - (t (reverse chrs-so-far))))) - - (let* ((next (peek-char nil *standard-input* nil)) - (radix (cond ((null next) 10) - ((char= next #\b) 2) - ((char= next #\o) 8) - ((char= next #\x) 16) - ((alpha-char-p next) nil) - (t 10))) - (arg (list chr))) - (when (and (char= chr #\0) radix (not (= radix 10))) - (read-char *standard-input* nil) - (setq arg '())) - (parse-integer (coerce (read-immediate-helper arg) 'string) :radix radix))) - -(defun read-keyword (chr) - "Reads a sequence of alphabetic characters. Throws `invalid-immediate-or-keyword' -error if a digit is encountered." - ;; may be combined with read-immediate-helper - (defun read-keyword-helper (chrs-so-far) - (let ((chr (peek-char nil *standard-input* nil))) - (cond ((and (not (null chr)) (alpha-char-p chr)) - (read-keyword-helper (cons (read-char *standard-input* nil) chrs-so-far))) - ((and (not (null chr)) (digit-char-p chr)) - (error 'lexer-error - :message - (format nil "LEX failed--encountered ~a while reading keyword.~%" chr))) - (t (reverse chrs-so-far))))) - (coerce (read-keyword-helper (list chr)) 'string)) - -(defun whitespace-char-p (x) - (or (char= #\space x) - (not (graphic-char-p x)))) diff --git a/src/main.lisp b/src/main.lisp index 8a36a66..9180d57 100644 --- a/src/main.lisp +++ b/src/main.lisp @@ -17,13 +17,6 @@ _/_/ _/_/ " (defun cli/options () "Returns options for the `rva' assembler." (list - (clingon:make-option - :flag - :description "run the lexer, but stop before parsing" - :long-name "lex" - :short-name #\l - :required nil - :key :lex) (clingon:make-option :flag :description "run the parser, but stop before emission" @@ -37,24 +30,18 @@ _/_/ _/_/ " (print-splash) (let* ((args (clingon:command-arguments cmd)) (file (car args)) - (parse? (not (clingon:getopt cmd :lex))) - (emit? (not (clingon:getopt cmd :parse)))) + (emit? (not (clingon:getopt cmd :parse)))) (cond ;; complain about num arguments ((/= (length args) 1) (error "Wrong number of arguments.~%")) ((not (util:asm-extension? file)) (error "The file is not an asm source code file.~%")) - (t (let ((tokens (lex:file->tokens file))) - (if tokens - (progn (pprint tokens) + (t (let ((str (uiop:read-file-string file))) + (if str + (progn (pprint (esrap:parse 'parse:text (string-upcase str))) (terpri) (format t "---~%")) (error "The file does not exist, or it could not be opened.~%")) - (if parse? - (let ((tokens (parse:tokens->ast tokens))) - (progn (pprint tokens) - (terpri) - (format t "---~%")))) (format t "Nitimur in Vetitum~%")))))) diff --git a/src/package.lisp b/src/package.lisp index b856e38..2b01d15 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -10,18 +10,10 @@ #:iota #:riffle)) -(defpackage #:lex - (:use #:cl) - (:export #:lexer-error - #:file->tokens - ;; exported for testing only - #:read-token)) - (defpackage #:parse (:use #:cl) - (:export #:parser-error - #:tokens->ast - ;; exported for testing only + (:export ;; exported for testing only + #:text #:register #:instr )) diff --git a/src/parse.lisp b/src/parse.lisp index 54b0709..a92eae7 100644 --- a/src/parse.lisp +++ b/src/parse.lisp @@ -1,22 +1,24 @@ (in-package #:parse) (esrap:defrule space - (+ (or #\Space #\Tab)) + (+ (or #\space #\tab)) (:constant nil)) +(esrap:defrule newline + (+ #\newline)) + ;;; defines rules to parse an integer in various bases -(esrap:defrule binary (and #\0 #\b (+ (or "0" "1"))) +(esrap:defrule binary (and #\0 #\B (+ (or "0" "1"))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 2))) -(esrap:defrule octal (and #\0 #\o (+ (or (esrap:character-ranges (#\0 #\7))))) +(esrap:defrule octal (and #\0 #\O (+ (or (esrap:character-ranges (#\0 #\7))))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 8))) (esrap:defrule decimal (+ (or (esrap:character-ranges (#\0 #\9)))) (:lambda (list) (parse-integer (esrap:text list) :radix 10))) -(esrap:defrule hex (and #\0 #\x (+ (or (esrap:character-ranges (#\0 #\9)) - "a" "b" "c" "d" "e" "f" +(esrap:defrule hex (and #\0 #\X (+ (or (esrap:character-ranges (#\0 #\9)) "A" "B" "C" "D" "E" "F"))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 16))) @@ -30,7 +32,10 @@ (esrap:defrule dereference (and (esrap:? (or #\+ #\-)) int #\( register #\)) (:destructure (s i1 w1 r w2) (declare (ignore w1 w2)) - (list r (if (and s (string= s "-")) (- i1) i1)))) + (list r (list 'imm (if (and s (string= s "-")) (- i1) i1))))) + +(esrap:defrule immediate int + (:lambda (i) (list 'imm i))) ;;; defines rules to parse labels @@ -56,6 +61,11 @@ (esrap:defrule j-type-3-m (or "PUSH" "POP")) (defmacro defrule-instr (name type-id order &rest destructure-pattern) + "Defines the boilerplate for a common esrap instruction rule. +NAME is the name of the non-terminal symbol. +TYPE-ID is the symbol which appears as the first element of a successful parse. +ORDER is the order to place the parsed tokens in the resulting list. +DESTRUCTURE-PATTERN is the list of non-terminals on the right side of the grammar rule." (let* ((pattern-size (length destructure-pattern)) (spaces (mapcar (lambda (x) (read-from-string (format nil "w~A" x))) (util:iota pattern-size))) (vars (mapcar (lambda (x) (read-from-string (format nil "s~A" x))) (util:iota pattern-size)))) @@ -68,40 +78,41 @@ (defrule-instr r-type-1 'r (1 2 0) register register register) (defrule-instr r-type-2 'r (1 2 0) register register) (defrule-instr r-type-3 'r (0 1 2) register register) -;;(defrule-instr j-type-1 'j (0 1) dereference) + +(esrap:defrule i-type-1 (and i-type-1-m space register space dereference) + (:destructure (m w1 s w2 di) + (declare (ignore w1 w2)) + `(i ,m ,s ,@di))) + +(esrap:defrule i-type-2 (and i-type-2-m space register space dereference) + (:destructure (m w1 s w2 di) + (declare (ignore w1 w2)) + `(i ,m ,@(util:insert-in-middle di s)))) + +(defrule-instr i-type-3 'i (0 1 2) register register immediate) +(esrap:defrule j-type-1 (and j-type-1-m space dereference) + (:destructure (m w di) + (declare (ignore w)) + `(j ,m ,@di))) + (defrule-instr j-type-2 'j (1 0) label) -(defrule-instr j-type-3 'j (0 1) register) - -;; (esrap:defrule i-type-1 (and i-type-1-m space register space dereference) -;; (:destructure (m w1 s w2 di) -;; (declare (ignore w1 w2)) -;; `(i ,m ,s ,@di))) -;; (esrap:defrule i-type-2 (and i-type-2-m space register space dereference) -;; (:destructure (m w1 s w2 di) -;; (declare (ignore w1 w2)) -;; `(i ,m ,@(util:insert-in-middle di s)))) -;; (esrap:defrule i-type-3 (and i-type-3-m space register space register space int) -;; (:destructure (m w1 s1 w2 s2 w3 i) -;; (declare (ignore w1 w2 w3)) -;; (list i m s1 s2 i))) -;; (esrap:defrule j-type-1 (and j-type-1-m space dereference) -;; (:destructure (m w di) -;; (declare (ignore w)) -;; `(j ,m ,@di))) -;; (esrap:defrule j-type-2 (and j-type-2-m space label) -;; (:destructure (m w label) -;; (declare (ignore w)) -;; (list j m "00000" label))) -;; (esrap:defrule j-type-3 (and j-type-3-m space register) -;; (:destructure (m w r) -;; (declare (ignore w)) -;; (list j m r "00000"))) - -(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 j-type-2 j-type-3)) - - -;; (esrap:parse 'register "$3") - -;; (esrap:parse 'j-type-1 "JMP 3($3)") -;; (esrap:parse 'j-type-2 "JRL FOO") -;; (esrap:parse 'j-type-3 "PUSH $1") +(esrap:defrule j-type-3 (and j-type-3-m space register) + (:destructure (m w r) + (declare (ignore w)) + `(j ,m ,r (imm 0)))) + +(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 i-type-1 i-type-2 + i-type-3 j-type-1 j-type-2 j-type-3 label-decl)) + +;;; defines rules to parse the .text segment + +(esrap:defrule instr-clean (and (esrap:? space) instr newline) + (:destructure (w1 i w2) + (declare (ignore nl l)) + i)) + +(esrap:defrule text (and ".TEXT" newline + (* instr-clean)) + (:destructure (txt nl is) + (declare (ignore txt nl)) + (list 'text is))) diff --git a/t/lex.lisp b/t/lex.lisp deleted file mode 100644 index 98d7c28..0000000 --- a/t/lex.lisp +++ /dev/null @@ -1,104 +0,0 @@ -(in-package #:rva-tests) - -(defmacro read-this (str &body body) - `(let ((*standard-input* (make-string-input-stream ,str))) - ,@body)) - -(def-suite lex-tests - :description "Test functions exported from the lexer." - :in all-tests) - -(in-suite lex-tests) - -(test read-token-reads-eof - (read-this "" - (is (not (lex:read-token))))) - -(test read-token-reads-nl - (read-this " -" - (is (eq (lex:read-token) 'lex::nl)))) - -(test read-token-reads-left-paren - (read-this "." - (is (eq (lex:read-token) 'lex::period)))) - -(test read-token-reads-left-paren - (read-this "(" - (is (eq (lex:read-token) 'lex::left-paren)))) - -(test read-token-reads-right-paren - (read-this ")" - (is (eq (lex:read-token) 'lex::right-paren)))) - -(test read-token-reads-left-paren - (read-this "$" - (is (eq (lex:read-token) 'lex::dollar)))) - -(test read-token-reads-plus - (read-this "+" - (is (eq (lex:read-token) 'lex::plus)))) - -(test read-token-reads-minus - (read-this "-" - (is (eq (lex:read-token) 'lex::minus)))) - -(test read-token-ignores-space - (read-this " (" - (is (eq (lex:read-token) 'lex::left-paren)))) - -(test read-token-ignores-tab - (read-this " (" - (is (eq (lex:read-token) 'lex::left-paren)))) - -(test read-token-ignores-comment - (read-this "; this is a comment -(" - (is (eq (lex:read-token) 'lex::nl)))) - -(test read-token-immediate-zero - (read-this "0" - (is (= (lex:read-token) 0)))) - -(test read-token-immediate-all-digits - (read-this "123456789" - (is (= (lex:read-token) 123456789)))) - -(test read-token-immediate-binary - (read-this "0b00101010" - (is (= (lex:read-token) 42)))) - -(test read-token-immediate-octal - (read-this "0o052" - (is (= (lex:read-token) 42)))) - -(test read-token-immediate-hexadecimal - (read-this "0x200" - (is (= (lex:read-token) 512)))) - -(test read-token-immediate-invalid-immediate - (handler-case - (progn (read-this "0v0" (lex:read-token)) - (fail)) - (lex:lexer-error ()))) - -;; do we want a custom error for this too? -(test read-token-immediate-radix - (handler-case - (progn (read-this "0x" (lex:read-token)) - (fail)) - (sb-int:simple-parse-error ()))) - -(test read-token-keyword-single - (read-this "a" - (is (string= (lex:read-token) "a")))) - -(test read-token-keyword-add - (read-this "addi" - (is (string= (lex:read-token) "addi")))) - -(test read-token-immediate-invalid-keyword - (handler-case - (progn (read-this "sub0" (lex:read-token)) - (fail)) - (lex:lexer-error ()))) diff --git a/t/parse.lisp b/t/parse.lisp index 77db380..a9de6db 100644 --- a/t/parse.lisp +++ b/t/parse.lisp @@ -37,22 +37,26 @@ (is (equal '(parse::r "CMP" (parse::rr 1) (parse::rr 5) (parse::rr 0)) (esrap:parse 'parse:instr "CMP $1 $5")))) -;; (test esrap-i-type-1 -;; (is (equal (list 'parse::i "LOAD" (list 'parse::rr 8) (list 'parse::rr 9) (list 'parse::r 1)) -;; (esrap:parse 'parse:instr "LOAD $8 1($9)")))) +(test esrap-i-type-1 + (is (equal '(parse::i "LOAD" (parse::rr 8) (parse::rr 9) (parse::imm 1)) + (esrap:parse 'parse:instr "LOAD $8 1($9)")))) -;; (test esrap-i-type-2 -;; (is (equal (list 'parse::i "STORE" (list 'parse::rr 3) (list 'parse::rr 5) (list 'parse::rr 3)) -;; (esrap:parse 'parse:instr "STORE $5 3($3)")))) +(test esrap-i-type-2 + (is (equal '(parse::i "STORE" (parse::rr 3) (parse::rr 5) (parse::imm 3)) + (esrap:parse 'parse:instr "STORE $5 3($3)")))) -;; (test esrap-i-type-3 -;; (is (equal (list 'parse::i "ORI" (list 'parse::rr 5) (list 'parse::rr 4) (list 'parse::r 2)) -;; (esrap:parse 'parse:instr "ORI $5 $4 2")))) +(test esrap-i-type-3 + (is (equal '(parse::i "ORI" (parse::rr 5) (parse::rr 4) (parse::imm 2)) + (esrap:parse 'parse:instr "ORI $5 $4 2")))) + +(test esrap-j-type-1 + (is (equal '(parse::j "JMP" (parse::rr 3) (parse::imm 3)) + (esrap:parse 'parse:instr "JMP 3($3)")))) (test esrap-j-type-2 (is (equal '(parse::j "JRL" (parse::rr 0) (parse::l "FOO")) (esrap:parse 'parse:instr "JRL FOO")))) (test esrap-j-type-3 - (is (equal '(parse::j "PUSH" (parse::rr 1) 0) + (is (equal '(parse::j "PUSH" (parse::rr 1) (parse::imm 0)) (esrap:parse 'parse:instr "PUSH $1")))) -- cgit v1.2.3 From e50d44464db7eb8e1c20755c862466ac8f7419b0 Mon Sep 17 00:00:00 2001 From: bd Date: Wed, 9 Apr 2025 23:45:16 -0400 Subject: Properly maintain a hashmap of labels, lots of minor code cleanups --- src/main.lisp | 7 +++-- src/package.lisp | 11 ++++++- src/parse.lisp | 89 ++++++++++++++++++++++++++++++++++---------------------- src/util.lisp | 71 +++++++++++++++++++++++++++++++++++--------- t/parse.lisp | 12 ++++---- 5 files changed, 132 insertions(+), 58 deletions(-) (limited to 'src/main.lisp') diff --git a/src/main.lisp b/src/main.lisp index 9180d57..be9f69a 100644 --- a/src/main.lisp +++ b/src/main.lisp @@ -1,6 +1,6 @@ (in-package #:rva) -(defparameter *banner* +(defvar *banner* " _/_/ _/_/ _/ _/ _/ _/ _/_/ _/ _/ _/_/_/ _/ @@ -30,7 +30,7 @@ _/_/ _/_/ " (print-splash) (let* ((args (clingon:command-arguments cmd)) (file (car args)) - (emit? (not (clingon:getopt cmd :parse)))) + (emit? (not (clingon:getopt cmd :parse)))) (cond ;; complain about num arguments ((/= (length args) 1) (error "Wrong number of arguments.~%")) @@ -38,8 +38,9 @@ _/_/ _/_/ " (error "The file is not an asm source code file.~%")) (t (let ((str (uiop:read-file-string file))) (if str - (progn (pprint (esrap:parse 'parse:text (string-upcase str))) + (progn (pprint (esrap:parse 'parse:text (string-upcase str))) (terpri) + (maphash #'(lambda (k v) (format t "~A => ~A~%" k v)) util:label-table) (format t "---~%")) (error "The file does not exist, or it could not be opened.~%")) (format t "Nitimur in Vetitum~%")))))) diff --git a/src/package.lisp b/src/package.lisp index 2b01d15..efa617c 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -8,7 +8,16 @@ #:format-as-binary #:insert-in-middle #:iota - #:riffle)) + #:riffle + #:add-variable + #:add-label + #:get-variable + #:get-label + #:label-table + #:r-type + #:i-type + #:j-type + #:mnemonic-loc)) (defpackage #:parse (:use #:cl) diff --git a/src/parse.lisp b/src/parse.lisp index a92eae7..f9ede20 100644 --- a/src/parse.lisp +++ b/src/parse.lisp @@ -1,14 +1,22 @@ (in-package #:parse) +(defparameter line-number 0) + (esrap:defrule space (+ (or #\space #\tab)) (:constant nil)) (esrap:defrule newline - (+ #\newline)) + (+ #\newline) + (:destructure (n) + (declare (ignore n)) + (incf line-number) + nil)) ;;; defines rules to parse an integer in various bases +(defmacro define-number-rule ()) + (esrap:defrule binary (and #\0 #\B (+ (or "0" "1"))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 2))) @@ -19,7 +27,7 @@ (:lambda (list) (parse-integer (esrap:text list) :radix 10))) (esrap:defrule hex (and #\0 #\X (+ (or (esrap:character-ranges (#\0 #\9)) - "A" "B" "C" "D" "E" "F"))) + "A" "B" "C" "D" "E" "F"))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 16))) (esrap:defrule int (or binary octal hex decimal)) @@ -27,7 +35,8 @@ ;;; defines rules to parse an operand (esrap:defrule register (and #\$ (or int reg-id)) - (:lambda (list) (list 'rr (cadr list)))) + (:function cadr) + (:lambda (id) (list 'rr id))) (esrap:defrule dereference (and (esrap:? (or #\+ #\-)) int #\( register #\)) (:destructure (s i1 w1 r w2) @@ -43,22 +52,32 @@ (:lambda (list) (list 'l (esrap:text list)))) (esrap:defrule label-decl (and label #\:) - (:destructure (l w) - (declare (ignore w)) - l)) + (:function car) + (:lambda (l) + (util:add-label l line-number) + ;; this line isn't in the final program + (decf line-number) + nil)) ;;; defines rules to parse instruction types -(esrap:defrule r-type-1-m (or "ADDV" "SUBV" "MULV" "DIVV" "ADD" "SUB" "MUL" - "QUOT" "REM" "SFTR" "SFTL" "AND" "OR" "NOT" "XOR" )) -(esrap:defrule r-type-2-m "NOT") -(esrap:defrule r-type-3-m (or "CMP" "CEV")) -(esrap:defrule i-type-1-m (or "LOADV" "LOAD")) -(esrap:defrule i-type-2-m (or "STOREV" "STORE")) -(esrap:defrule i-type-3-m (or "ADDI" "SUBI" "SFTRI" "SFTLI" "ANDI" "ORI" "XORI")) -(esrap:defrule j-type-1-m (or "JMP" "JAL")) -(esrap:defrule j-type-2-m (or "JRL" "BEQ" "BGT" "BUF" "BOF")) -(esrap:defrule j-type-3-m (or "PUSH" "POP")) +(defun generate-mnemonic (name ops) + (let ((expr `(or ,@ops))) + (esrap:add-rule + name (make-instance 'esrap:rule :expression expr)))) + +;; define special cases first +(generate-mnemonic 'r-type-1-m '("NOT")) +(generate-mnemonic 'r-type-2-m '("CMP" "CEV")) +(generate-mnemonic 'i-type-1-m '("LOADV" "LOAD")) +(generate-mnemonic 'i-type-2-m '("STOREV" "STORE")) +(generate-mnemonic 'j-type-1-m '("JMP" "JAL")) +(generate-mnemonic 'j-type-2-m '("PUSH" "POP")) + +;; we need to reverse to ensure rules like "ADDV" are matched before "ADD" +(generate-mnemonic 'r-type-3-m (reverse util:r-type)) +(generate-mnemonic 'i-type-3-m (reverse util:i-type)) +(generate-mnemonic 'j-type-3-m (reverse util:j-type)) (defmacro defrule-instr (name type-id order &rest destructure-pattern) "Defines the boilerplate for a common esrap instruction rule. @@ -67,17 +86,19 @@ TYPE-ID is the symbol which appears as the first element of a successful parse. ORDER is the order to place the parsed tokens in the resulting list. DESTRUCTURE-PATTERN is the list of non-terminals on the right side of the grammar rule." (let* ((pattern-size (length destructure-pattern)) - (spaces (mapcar (lambda (x) (read-from-string (format nil "w~A" x))) (util:iota pattern-size))) - (vars (mapcar (lambda (x) (read-from-string (format nil "s~A" x))) (util:iota pattern-size)))) + (spaces (mapcar (lambda (x) (read-from-string (format nil "w~A" x))) (util:iota pattern-size))) + (vars (mapcar (lambda (x) (read-from-string (format nil "s~A" x))) (util:iota pattern-size)))) `(esrap:defrule ,name - (and ,(read-from-string (format nil "~A-m" name)) ,@(util:riffle (make-list pattern-size :initial-element 'space) destructure-pattern)) + (and ,(read-from-string (format nil "~A-m" name)) ,@(util:riffle (make-list pattern-size :initial-element 'space) destructure-pattern)) (:destructure (m ,@(util:riffle spaces vars)) - (declare (ignore ,@spaces)) - (list ,type-id m ,@(mapcar (lambda (x) (or (nth x vars) ''(rr 0))) order)))))) + (declare (ignore ,@spaces)) + (list ,type-id m ,@(mapcar (lambda (x) (or (nth x vars) ''(rr 0))) order)))))) -(defrule-instr r-type-1 'r (1 2 0) register register register) -(defrule-instr r-type-2 'r (1 2 0) register register) -(defrule-instr r-type-3 'r (0 1 2) register register) +(defrule-instr r-type-1 'r (1 2 0) register register) +(defrule-instr r-type-2 'r (0 1 2) register register) +(defrule-instr r-type-3 'r (1 2 0) register register register) +(defrule-instr i-type-3 'i (0 1 2) register register immediate) +(defrule-instr j-type-3 'j (1 0) label) (esrap:defrule i-type-1 (and i-type-1-m space register space dereference) (:destructure (m w1 s w2 di) @@ -89,30 +110,28 @@ DESTRUCTURE-PATTERN is the list of non-terminals on the right side of the gramma (declare (ignore w1 w2)) `(i ,m ,@(util:insert-in-middle di s)))) -(defrule-instr i-type-3 'i (0 1 2) register register immediate) (esrap:defrule j-type-1 (and j-type-1-m space dereference) (:destructure (m w di) (declare (ignore w)) `(j ,m ,@di))) -(defrule-instr j-type-2 'j (1 0) label) -(esrap:defrule j-type-3 (and j-type-3-m space register) +(esrap:defrule j-type-2 (and j-type-2-m space register) (:destructure (m w r) (declare (ignore w)) `(j ,m ,r (imm 0)))) (esrap:defrule instr (or r-type-1 r-type-2 r-type-3 i-type-1 i-type-2 - i-type-3 j-type-1 j-type-2 j-type-3 label-decl)) + i-type-3 j-type-1 j-type-2 j-type-3 label-decl)) ;;; defines rules to parse the .text segment (esrap:defrule instr-clean (and (esrap:? space) instr newline) - (:destructure (w1 i w2) - (declare (ignore nl l)) - i)) + (:function cadr)) (esrap:defrule text (and ".TEXT" newline - (* instr-clean)) - (:destructure (txt nl is) - (declare (ignore txt nl)) - (list 'text is))) + (* instr-clean)) + (:function caddr) + (:lambda (instr) + `(text ,@(remove nil instr)))) + +;;; defines rules to parse the .data segment diff --git a/src/util.lisp b/src/util.lisp index 93db659..d8bd227 100644 --- a/src/util.lisp +++ b/src/util.lisp @@ -4,7 +4,10 @@ "Returns t if FILE is extended with .asm, nil otherwise." (string= (pathname-type file) "asm")) -;; TODO this won't work for negative numbers of odd sizes quite yet. +(defun fits-in-X-bits (n) + "Returns the number of bits required to represent N" + (ceiling (/ (log (ceiling n (log 2))) (log 2)))) + (defun format-as-binary (num len) "Formats NUM as a binary number, and pads to LEN with zeros." (declare (type number num)) @@ -18,26 +21,68 @@ "Generates a number sequence from 0 to N." (when (> n 0) (do ((i 0 (1+ i)) - (item 0 (1+ item)) - (result nil (push item result))) - ((= i n) (nreverse result))))) + (item 0 (1+ item)) + (result nil (push item result))) + ((= i n) (nreverse result))))) (defun riffle (lst1 lst2) - "Given LST1 and LST2, returns a new list which is the an alternative sequence + "Given LST1 and LST2, returns a new list which is the an alternating sequence of the elements from both lists. Returns nil if the lists are not equal size." (when (eq (length lst1) (length lst2)) (loop for l1 in lst1 - for l2 in lst2 - append (list l1 l2)))) + for l2 in lst2 + append (list l1 l2)))) + +(defvar variable-table (make-hash-table :test #'equal)) +(defvar label-table (make-hash-table :test #'equal)) + +(defun add-variable (name value) + (if (gethash name variable-table) + (error "~@" name) + (setf (gethash name variable-table) value))) + +(defun add-label (name value) + (if (gethash name label-table) + (error "~@