diff options
author | bd <bdunahu@operationnull.com> | 2025-04-09 20:25:57 -0400 |
---|---|---|
committer | bd <bdunahu@operationnull.com> | 2025-04-09 20:25:57 -0400 |
commit | ce80845b5cb5b473327e4e561342490576d83a90 (patch) | |
tree | 90f7189982384c2e4de2f58a3f2daac4adf80514 | |
parent | 6ba1871c3825e17d33b96ffd3051239dfe18d61a (diff) |
Successfully parse every instruction type
-rw-r--r-- | input/add-loop.asm | 13 | ||||
-rw-r--r-- | rva.asd | 4 | ||||
-rw-r--r-- | src/lex.lisp | 110 | ||||
-rw-r--r-- | src/main.lisp | 21 | ||||
-rw-r--r-- | src/package.lisp | 12 | ||||
-rw-r--r-- | src/parse.lisp | 95 | ||||
-rw-r--r-- | t/lex.lisp | 104 | ||||
-rw-r--r-- | t/parse.lisp | 24 |
8 files changed, 81 insertions, 302 deletions
diff --git a/input/add-loop.asm b/input/add-loop.asm index 6379831..f8d97a1 100644 --- a/input/add-loop.asm +++ b/input/add-loop.asm @@ -1,17 +1,18 @@ - addi $fp $0 0x200 +.text + addi $2 $0 0x200 addi $5 $0 0x1 - store $5 0($fp) + store $5 0($2) addi $5 $0 0x2 - store $5 1($fp) + store $5 1($2) addi $5 $0 0x3 - store $5 2($fp) + store $5 2($2) addi $5 $0 0x4 - store $5 3($fp) + store $5 3($2) addi $5 $0 0x0 addi $6 $0 0x3 jrl CHECK LOOP: - add $9 $fp $5 + add $9 $2 $5 load $7 -0($9) load $8 +1($9) add $7 $7 $8 @@ -15,8 +15,7 @@ :serial t :components ((:file "package") (:file "util") - (:file "lex") - (:file "parse") + (:file "parse") (:file "main")))) :long-description #.(uiop:read-file-string @@ -37,7 +36,6 @@ :components ((:file "package") (:file "main") (:file "util") - (:file "lex") (:file "parse")))) :perform (test-op (o s) (uiop:symbol-call :rva-tests :test-rva))) diff --git a/src/lex.lisp b/src/lex.lisp deleted file mode 100644 index c86d17c..0000000 --- a/src/lex.lisp +++ /dev/null @@ -1,110 +0,0 @@ -(in-package #:lex) - -(define-condition lexer-error (error) - ((message :initarg :message - :initform nil - :reader message)) - (:report (lambda (condition stream) - (format stream "~A" (message condition)))) - (:documentation "Dedicated error for an invalid lex.")) - -(defun file->tokens (file) - "Opens FILE and parses returns a list of tokens, or -NIL if the file could not be opened." - - (defun read-instr (lst tokens-so-far) - "Collects tokens in FILE into TOKENS-SO-FAR, splitting on a newline." - (let ((token (read-token))) - (cond ((null token) (reverse tokens-so-far)) - ((eq token 'nl) - (cons (reverse tokens-so-far) (read-instr nil nil))) - (t (read-instr lst (cons token tokens-so-far)))))) - - (and (probe-file file) - (with-open-file (*standard-input* file :direction :input) - (remove nil (read-instr '() '()))))) - -(defun read-token () - "Reads *STANDARD-INPUT* and returns a token, or nil if the end -of file has been reached. -Whitespace, commas, colons, and parentheses are token delimiters. -Comments start with a semi-colon ';' and all tokens after are ignored." - (let ((chr (read-char *standard-input* nil))) - (cond - ((null chr) chr) - - ((char= chr #\linefeed) 'nl) - - ((whitespace-char-p chr) - (read-token)) - - ((char= chr #\;) - (progn (read-line *standard-input* nil) - 'nl)) - - ((char= chr #\.) 'period) - - ((char= chr #\() 'left-paren) - ((char= chr #\)) 'right-paren) - - ((char= chr #\:) 'colon) - ((char= chr #\$) 'dollar) - - ((char= chr #\+) 'plus) - ((char= chr #\-) 'minus) - - ((digit-char-p chr) - (read-immediate chr)) - - ((alpha-char-p chr) - (read-keyword chr)) - - (t (error 'lexer-error - :message - (format nil "LEX failled--~a is not a valid lexical symbol.~%" chr)))))) - -(defun read-immediate (chr) - "Reads a sequence of digits, in base 2, 8, 10, or 16.. Throws -`invalid-immediate-or-keyword' error if an alphabetic character is encountered." - ;; may be combined with read-keyword-helper - (defun read-immediate-helper (chrs-so-far) - (let ((chr (peek-char nil *standard-input* nil))) - (cond ((and (not (null chr)) (digit-char-p chr)) - (read-immediate-helper (cons (read-char *standard-input* nil) chrs-so-far))) - ((and (not (null chr)) (alpha-char-p chr)) - (error 'lexer-error - :message - (format nil "LEX failed--encountered ~a while reading immediate.~%" chr))) - (t (reverse chrs-so-far))))) - - (let* ((next (peek-char nil *standard-input* nil)) - (radix (cond ((null next) 10) - ((char= next #\b) 2) - ((char= next #\o) 8) - ((char= next #\x) 16) - ((alpha-char-p next) nil) - (t 10))) - (arg (list chr))) - (when (and (char= chr #\0) radix (not (= radix 10))) - (read-char *standard-input* nil) - (setq arg '())) - (parse-integer (coerce (read-immediate-helper arg) 'string) :radix radix))) - -(defun read-keyword (chr) - "Reads a sequence of alphabetic characters. Throws `invalid-immediate-or-keyword' -error if a digit is encountered." - ;; may be combined with read-immediate-helper - (defun read-keyword-helper (chrs-so-far) - (let ((chr (peek-char nil *standard-input* nil))) - (cond ((and (not (null chr)) (alpha-char-p chr)) - (read-keyword-helper (cons (read-char *standard-input* nil) chrs-so-far))) - ((and (not (null chr)) (digit-char-p chr)) - (error 'lexer-error - :message - (format nil "LEX failed--encountered ~a while reading keyword.~%" chr))) - (t (reverse chrs-so-far))))) - (coerce (read-keyword-helper (list chr)) 'string)) - -(defun whitespace-char-p (x) - (or (char= #\space x) - (not (graphic-char-p x)))) diff --git a/src/main.lisp b/src/main.lisp index 8a36a66..9180d57 100644 --- a/src/main.lisp +++ b/src/main.lisp @@ -19,13 +19,6 @@ _/_/ _/_/ " (list (clingon:make-option :flag - :description "run the lexer, but stop before parsing" - :long-name "lex" - :short-name #\l - :required nil - :key :lex) - (clingon:make-option - :flag :description "run the parser, but stop before emission" :long-name "parse" :short-name #\p @@ -37,24 +30,18 @@ _/_/ _/_/ " (print-splash) (let* ((args (clingon:command-arguments cmd)) (file (car args)) - (parse? (not (clingon:getopt cmd :lex))) - (emit? (not (clingon:getopt cmd :parse)))) + (emit? (not (clingon:getopt cmd :parse)))) (cond ;; complain about num arguments ((/= (length args) 1) (error "Wrong number of arguments.~%")) ((not (util:asm-extension? file)) (error "The file is not an asm source code file.~%")) - (t (let ((tokens (lex:file->tokens file))) - (if tokens - (progn (pprint tokens) + (t (let ((str (uiop:read-file-string file))) + (if str + (progn (pprint (esrap:parse 'parse:text (string-upcase str))) (terpri) (format t "---~%")) (error "The file does not exist, or it could not be opened.~%")) - (if parse? - (let ((tokens (parse:tokens->ast tokens))) - (progn (pprint tokens) - (terpri) - (format t "---~%")))) (format t "Nitimur in Vetitum~%")))))) diff --git a/src/package.lisp b/src/package.lisp index b856e38..2b01d15 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -10,18 +10,10 @@ #:iota #:riffle)) -(defpackage #:lex - (:use #:cl) - (:export #:lexer-error - #:file->tokens - ;; exported for testing only - #:read-token)) - (defpackage #:parse (:use #:cl) - (:export #:parser-error - #:tokens->ast - ;; exported for testing only + (:export ;; exported for testing only + #:text #:register #:instr )) diff --git a/src/parse.lisp b/src/parse.lisp index 54b0709..a92eae7 100644 --- a/src/parse.lisp +++ b/src/parse.lisp @@ -1,22 +1,24 @@ (in-package #:parse) (esrap:defrule space - (+ (or #\Space #\Tab)) + (+ (or #\space #\tab)) (:constant nil)) +(esrap:defrule newline + (+ #\newline)) + ;;; defines rules to parse an integer in various bases -(esrap:defrule binary (and #\0 #\b (+ (or "0" "1"))) +(esrap:defrule binary (and #\0 #\B (+ (or "0" "1"))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 2))) -(esrap:defrule octal (and #\0 #\o (+ (or (esrap:character-ranges (#\0 #\7))))) +(esrap:defrule octal (and #\0 #\O (+ (or (esrap:character-ranges (#\0 #\7))))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 8))) (esrap:defrule decimal (+ (or (esrap:character-ranges (#\0 #\9)))) (:lambda (list) (parse-integer (esrap:text list) :radix 10))) -(esrap:defrule hex (and #\0 #\x (+ (or (esrap:character-ranges (#\0 #\9)) - "a" "b" "c" "d" "e" "f" +(esrap:defrule hex (and #\0 #\X (+ (or (esrap:character-ranges (#\0 #\9)) "A" "B" "C" "D" "E" "F"))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 16))) @@ -30,7 +32,10 @@ (esrap:defrule dereference (and (esrap:? (or #\+ #\-)) int #\( register #\)) (:destructure (s i1 w1 r w2) (declare (ignore w1 w2)) - (list r (if (and s (string= s "-")) (- i1) i1)))) + (list r (list 'imm (if (and s (string= s "-")) (- i1) i1))))) + +(esrap:defrule immediate int + (:lambda (i) (list 'imm i))) ;;; defines rules to parse labels @@ -56,6 +61,11 @@ (esrap:defrule j-type-3-m (or "PUSH" "POP")) (defmacro defrule-instr (name type-id order &rest destructure-pattern) + "Defines the boilerplate for a common esrap instruction rule. +NAME is the name of the non-terminal symbol. +TYPE-ID is the symbol which appears as the first element of a successful parse. +ORDER is the order to place the parsed tokens in the resulting list. +DESTRUCTURE-PATTERN is the list of non-terminals on the right side of the grammar rule." (let* ((pattern-size (length destructure-pattern)) (spaces (mapcar (lambda (x) (read-from-string (format nil "w~A" x))) (util:iota pattern-size))) (vars (mapcar (lambda (x) (read-from-string (format nil "s~A" x))) (util:iota pattern-size)))) @@ -68,40 +78,41 @@ (defrule-instr r-type-1 'r (1 2 0) register register register) (defrule-instr r-type-2 'r (1 2 0) register register) (defrule-instr r-type-3 'r (0 1 2) register register) -;;(defrule-instr j-type-1 'j (0 1) dereference) + +(esrap:defrule i-type-1 (and i-type-1-m space register space dereference) + (:destructure (m w1 s w2 di) + (declare (ignore w1 w2)) + `(i ,m ,s ,@di))) + +(esrap:defrule i-type-2 (and i-type-2-m space register space dereference) + (:destructure (m w1 s w2 di) + (declare (ignore w1 w2)) + `(i ,m ,@(util:insert-in-middle di s)))) + +(defrule-instr i-type-3 'i (0 1 2) register register immediate) +(esrap:defrule j-type-1 (and j-type-1-m space dereference) + (:destructure (m w di) + (declare (ignore w)) + `(j ,m ,@di))) + (defrule-instr j-type-2 'j (1 0) label) -(defrule-instr j-type-3 'j (0 1) register) - -;; (esrap:defrule i-type-1 (and i-type-1-m space register space dereference) -;; (:destructure (m w1 s w2 di) -;; (declare (ignore w1 w2)) -;; `(i ,m ,s ,@di))) -;; (esrap:defrule i-type-2 (and i-type-2-m space register space dereference) -;; (:destructure (m w1 s w2 di) -;; (declare (ignore w1 w2)) -;; `(i ,m ,@(util:insert-in-middle di s)))) -;; (esrap:defrule i-type-3 (and i-type-3-m space register space register space int) -;; (:destructure (m w1 s1 w2 s2 w3 i) -;; (declare (ignore w1 w2 w3)) -;; (list i m s1 s2 i))) -;; (esrap:defrule j-type-1 (and j-type-1-m space dereference) -;; (:destructure (m w di) -;; (declare (ignore w)) -;; `(j ,m ,@di))) -;; (esrap:defrule j-type-2 (and j-type-2-m space label) -;; (:destructure (m w label) -;; (declare (ignore w)) -;; (list j m "00000" label))) -;; (esrap:defrule j-type-3 (and j-type-3-m space register) -;; (:destructure (m w r) -;; (declare (ignore w)) -;; (list j m r "00000"))) - -(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 j-type-2 j-type-3)) - - -;; (esrap:parse 'register "$3") - -;; (esrap:parse 'j-type-1 "JMP 3($3)") -;; (esrap:parse 'j-type-2 "JRL FOO") -;; (esrap:parse 'j-type-3 "PUSH $1") +(esrap:defrule j-type-3 (and j-type-3-m space register) + (:destructure (m w r) + (declare (ignore w)) + `(j ,m ,r (imm 0)))) + +(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 i-type-1 i-type-2 + i-type-3 j-type-1 j-type-2 j-type-3 label-decl)) + +;;; defines rules to parse the .text segment + +(esrap:defrule instr-clean (and (esrap:? space) instr newline) + (:destructure (w1 i w2) + (declare (ignore nl l)) + i)) + +(esrap:defrule text (and ".TEXT" newline + (* instr-clean)) + (:destructure (txt nl is) + (declare (ignore txt nl)) + (list 'text is))) diff --git a/t/lex.lisp b/t/lex.lisp deleted file mode 100644 index 98d7c28..0000000 --- a/t/lex.lisp +++ /dev/null @@ -1,104 +0,0 @@ -(in-package #:rva-tests) - -(defmacro read-this (str &body body) - `(let ((*standard-input* (make-string-input-stream ,str))) - ,@body)) - -(def-suite lex-tests - :description "Test functions exported from the lexer." - :in all-tests) - -(in-suite lex-tests) - -(test read-token-reads-eof - (read-this "" - (is (not (lex:read-token))))) - -(test read-token-reads-nl - (read-this " -" - (is (eq (lex:read-token) 'lex::nl)))) - -(test read-token-reads-left-paren - (read-this "." - (is (eq (lex:read-token) 'lex::period)))) - -(test read-token-reads-left-paren - (read-this "(" - (is (eq (lex:read-token) 'lex::left-paren)))) - -(test read-token-reads-right-paren - (read-this ")" - (is (eq (lex:read-token) 'lex::right-paren)))) - -(test read-token-reads-left-paren - (read-this "$" - (is (eq (lex:read-token) 'lex::dollar)))) - -(test read-token-reads-plus - (read-this "+" - (is (eq (lex:read-token) 'lex::plus)))) - -(test read-token-reads-minus - (read-this "-" - (is (eq (lex:read-token) 'lex::minus)))) - -(test read-token-ignores-space - (read-this " (" - (is (eq (lex:read-token) 'lex::left-paren)))) - -(test read-token-ignores-tab - (read-this " (" - (is (eq (lex:read-token) 'lex::left-paren)))) - -(test read-token-ignores-comment - (read-this "; this is a comment -(" - (is (eq (lex:read-token) 'lex::nl)))) - -(test read-token-immediate-zero - (read-this "0" - (is (= (lex:read-token) 0)))) - -(test read-token-immediate-all-digits - (read-this "123456789" - (is (= (lex:read-token) 123456789)))) - -(test read-token-immediate-binary - (read-this "0b00101010" - (is (= (lex:read-token) 42)))) - -(test read-token-immediate-octal - (read-this "0o052" - (is (= (lex:read-token) 42)))) - -(test read-token-immediate-hexadecimal - (read-this "0x200" - (is (= (lex:read-token) 512)))) - -(test read-token-immediate-invalid-immediate - (handler-case - (progn (read-this "0v0" (lex:read-token)) - (fail)) - (lex:lexer-error ()))) - -;; do we want a custom error for this too? -(test read-token-immediate-radix - (handler-case - (progn (read-this "0x" (lex:read-token)) - (fail)) - (sb-int:simple-parse-error ()))) - -(test read-token-keyword-single - (read-this "a" - (is (string= (lex:read-token) "a")))) - -(test read-token-keyword-add - (read-this "addi" - (is (string= (lex:read-token) "addi")))) - -(test read-token-immediate-invalid-keyword - (handler-case - (progn (read-this "sub0" (lex:read-token)) - (fail)) - (lex:lexer-error ()))) diff --git a/t/parse.lisp b/t/parse.lisp index 77db380..a9de6db 100644 --- a/t/parse.lisp +++ b/t/parse.lisp @@ -37,22 +37,26 @@ (is (equal '(parse::r "CMP" (parse::rr 1) (parse::rr 5) (parse::rr 0)) (esrap:parse 'parse:instr "CMP $1 $5")))) -;; (test esrap-i-type-1 -;; (is (equal (list 'parse::i "LOAD" (list 'parse::rr 8) (list 'parse::rr 9) (list 'parse::r 1)) -;; (esrap:parse 'parse:instr "LOAD $8 1($9)")))) +(test esrap-i-type-1 + (is (equal '(parse::i "LOAD" (parse::rr 8) (parse::rr 9) (parse::imm 1)) + (esrap:parse 'parse:instr "LOAD $8 1($9)")))) -;; (test esrap-i-type-2 -;; (is (equal (list 'parse::i "STORE" (list 'parse::rr 3) (list 'parse::rr 5) (list 'parse::rr 3)) -;; (esrap:parse 'parse:instr "STORE $5 3($3)")))) +(test esrap-i-type-2 + (is (equal '(parse::i "STORE" (parse::rr 3) (parse::rr 5) (parse::imm 3)) + (esrap:parse 'parse:instr "STORE $5 3($3)")))) -;; (test esrap-i-type-3 -;; (is (equal (list 'parse::i "ORI" (list 'parse::rr 5) (list 'parse::rr 4) (list 'parse::r 2)) -;; (esrap:parse 'parse:instr "ORI $5 $4 2")))) +(test esrap-i-type-3 + (is (equal '(parse::i "ORI" (parse::rr 5) (parse::rr 4) (parse::imm 2)) + (esrap:parse 'parse:instr "ORI $5 $4 2")))) + +(test esrap-j-type-1 + (is (equal '(parse::j "JMP" (parse::rr 3) (parse::imm 3)) + (esrap:parse 'parse:instr "JMP 3($3)")))) (test esrap-j-type-2 (is (equal '(parse::j "JRL" (parse::rr 0) (parse::l "FOO")) (esrap:parse 'parse:instr "JRL FOO")))) (test esrap-j-type-3 - (is (equal '(parse::j "PUSH" (parse::rr 1) 0) + (is (equal '(parse::j "PUSH" (parse::rr 1) (parse::imm 0)) (esrap:parse 'parse:instr "PUSH $1")))) |