summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbd <bdunahu@operationnull.com>2025-04-09 20:25:57 -0400
committerbd <bdunahu@operationnull.com>2025-04-09 20:25:57 -0400
commitce80845b5cb5b473327e4e561342490576d83a90 (patch)
tree90f7189982384c2e4de2f58a3f2daac4adf80514
parent6ba1871c3825e17d33b96ffd3051239dfe18d61a (diff)
Successfully parse every instruction type
-rw-r--r--input/add-loop.asm13
-rw-r--r--rva.asd4
-rw-r--r--src/lex.lisp110
-rw-r--r--src/main.lisp21
-rw-r--r--src/package.lisp12
-rw-r--r--src/parse.lisp95
-rw-r--r--t/lex.lisp104
-rw-r--r--t/parse.lisp24
8 files changed, 81 insertions, 302 deletions
diff --git a/input/add-loop.asm b/input/add-loop.asm
index 6379831..f8d97a1 100644
--- a/input/add-loop.asm
+++ b/input/add-loop.asm
@@ -1,17 +1,18 @@
- addi $fp $0 0x200
+.text
+ addi $2 $0 0x200
addi $5 $0 0x1
- store $5 0($fp)
+ store $5 0($2)
addi $5 $0 0x2
- store $5 1($fp)
+ store $5 1($2)
addi $5 $0 0x3
- store $5 2($fp)
+ store $5 2($2)
addi $5 $0 0x4
- store $5 3($fp)
+ store $5 3($2)
addi $5 $0 0x0
addi $6 $0 0x3
jrl CHECK
LOOP:
- add $9 $fp $5
+ add $9 $2 $5
load $7 -0($9)
load $8 +1($9)
add $7 $7 $8
diff --git a/rva.asd b/rva.asd
index dd904fb..9271472 100644
--- a/rva.asd
+++ b/rva.asd
@@ -15,8 +15,7 @@
:serial t
:components ((:file "package")
(:file "util")
- (:file "lex")
- (:file "parse")
+ (:file "parse")
(:file "main"))))
:long-description
#.(uiop:read-file-string
@@ -37,7 +36,6 @@
:components ((:file "package")
(:file "main")
(:file "util")
- (:file "lex")
(:file "parse"))))
:perform (test-op (o s) (uiop:symbol-call :rva-tests :test-rva)))
diff --git a/src/lex.lisp b/src/lex.lisp
deleted file mode 100644
index c86d17c..0000000
--- a/src/lex.lisp
+++ /dev/null
@@ -1,110 +0,0 @@
-(in-package #:lex)
-
-(define-condition lexer-error (error)
- ((message :initarg :message
- :initform nil
- :reader message))
- (:report (lambda (condition stream)
- (format stream "~A" (message condition))))
- (:documentation "Dedicated error for an invalid lex."))
-
-(defun file->tokens (file)
- "Opens FILE and parses returns a list of tokens, or
-NIL if the file could not be opened."
-
- (defun read-instr (lst tokens-so-far)
- "Collects tokens in FILE into TOKENS-SO-FAR, splitting on a newline."
- (let ((token (read-token)))
- (cond ((null token) (reverse tokens-so-far))
- ((eq token 'nl)
- (cons (reverse tokens-so-far) (read-instr nil nil)))
- (t (read-instr lst (cons token tokens-so-far))))))
-
- (and (probe-file file)
- (with-open-file (*standard-input* file :direction :input)
- (remove nil (read-instr '() '())))))
-
-(defun read-token ()
- "Reads *STANDARD-INPUT* and returns a token, or nil if the end
-of file has been reached.
-Whitespace, commas, colons, and parentheses are token delimiters.
-Comments start with a semi-colon ';' and all tokens after are ignored."
- (let ((chr (read-char *standard-input* nil)))
- (cond
- ((null chr) chr)
-
- ((char= chr #\linefeed) 'nl)
-
- ((whitespace-char-p chr)
- (read-token))
-
- ((char= chr #\;)
- (progn (read-line *standard-input* nil)
- 'nl))
-
- ((char= chr #\.) 'period)
-
- ((char= chr #\() 'left-paren)
- ((char= chr #\)) 'right-paren)
-
- ((char= chr #\:) 'colon)
- ((char= chr #\$) 'dollar)
-
- ((char= chr #\+) 'plus)
- ((char= chr #\-) 'minus)
-
- ((digit-char-p chr)
- (read-immediate chr))
-
- ((alpha-char-p chr)
- (read-keyword chr))
-
- (t (error 'lexer-error
- :message
- (format nil "LEX failled--~a is not a valid lexical symbol.~%" chr))))))
-
-(defun read-immediate (chr)
- "Reads a sequence of digits, in base 2, 8, 10, or 16.. Throws
-`invalid-immediate-or-keyword' error if an alphabetic character is encountered."
- ;; may be combined with read-keyword-helper
- (defun read-immediate-helper (chrs-so-far)
- (let ((chr (peek-char nil *standard-input* nil)))
- (cond ((and (not (null chr)) (digit-char-p chr))
- (read-immediate-helper (cons (read-char *standard-input* nil) chrs-so-far)))
- ((and (not (null chr)) (alpha-char-p chr))
- (error 'lexer-error
- :message
- (format nil "LEX failed--encountered ~a while reading immediate.~%" chr)))
- (t (reverse chrs-so-far)))))
-
- (let* ((next (peek-char nil *standard-input* nil))
- (radix (cond ((null next) 10)
- ((char= next #\b) 2)
- ((char= next #\o) 8)
- ((char= next #\x) 16)
- ((alpha-char-p next) nil)
- (t 10)))
- (arg (list chr)))
- (when (and (char= chr #\0) radix (not (= radix 10)))
- (read-char *standard-input* nil)
- (setq arg '()))
- (parse-integer (coerce (read-immediate-helper arg) 'string) :radix radix)))
-
-(defun read-keyword (chr)
- "Reads a sequence of alphabetic characters. Throws `invalid-immediate-or-keyword'
-error if a digit is encountered."
- ;; may be combined with read-immediate-helper
- (defun read-keyword-helper (chrs-so-far)
- (let ((chr (peek-char nil *standard-input* nil)))
- (cond ((and (not (null chr)) (alpha-char-p chr))
- (read-keyword-helper (cons (read-char *standard-input* nil) chrs-so-far)))
- ((and (not (null chr)) (digit-char-p chr))
- (error 'lexer-error
- :message
- (format nil "LEX failed--encountered ~a while reading keyword.~%" chr)))
- (t (reverse chrs-so-far)))))
- (coerce (read-keyword-helper (list chr)) 'string))
-
-(defun whitespace-char-p (x)
- (or (char= #\space x)
- (not (graphic-char-p x))))
diff --git a/src/main.lisp b/src/main.lisp
index 8a36a66..9180d57 100644
--- a/src/main.lisp
+++ b/src/main.lisp
@@ -19,13 +19,6 @@ _/_/ _/_/ "
(list
(clingon:make-option
:flag
- :description "run the lexer, but stop before parsing"
- :long-name "lex"
- :short-name #\l
- :required nil
- :key :lex)
- (clingon:make-option
- :flag
:description "run the parser, but stop before emission"
:long-name "parse"
:short-name #\p
@@ -37,24 +30,18 @@ _/_/ _/_/ "
(print-splash)
(let* ((args (clingon:command-arguments cmd))
(file (car args))
- (parse? (not (clingon:getopt cmd :lex)))
- (emit? (not (clingon:getopt cmd :parse))))
+ (emit? (not (clingon:getopt cmd :parse))))
(cond
;; complain about num arguments
((/= (length args) 1) (error "Wrong number of arguments.~%"))
((not (util:asm-extension? file))
(error "The file is not an asm source code file.~%"))
- (t (let ((tokens (lex:file->tokens file)))
- (if tokens
- (progn (pprint tokens)
+ (t (let ((str (uiop:read-file-string file)))
+ (if str
+ (progn (pprint (esrap:parse 'parse:text (string-upcase str)))
(terpri)
(format t "---~%"))
(error "The file does not exist, or it could not be opened.~%"))
- (if parse?
- (let ((tokens (parse:tokens->ast tokens)))
- (progn (pprint tokens)
- (terpri)
- (format t "---~%"))))
(format t "Nitimur in Vetitum~%"))))))
diff --git a/src/package.lisp b/src/package.lisp
index b856e38..2b01d15 100644
--- a/src/package.lisp
+++ b/src/package.lisp
@@ -10,18 +10,10 @@
#:iota
#:riffle))
-(defpackage #:lex
- (:use #:cl)
- (:export #:lexer-error
- #:file->tokens
- ;; exported for testing only
- #:read-token))
-
(defpackage #:parse
(:use #:cl)
- (:export #:parser-error
- #:tokens->ast
- ;; exported for testing only
+ (:export ;; exported for testing only
+ #:text
#:register
#:instr
))
diff --git a/src/parse.lisp b/src/parse.lisp
index 54b0709..a92eae7 100644
--- a/src/parse.lisp
+++ b/src/parse.lisp
@@ -1,22 +1,24 @@
(in-package #:parse)
(esrap:defrule space
- (+ (or #\Space #\Tab))
+ (+ (or #\space #\tab))
(:constant nil))
+(esrap:defrule newline
+ (+ #\newline))
+
;;; defines rules to parse an integer in various bases
-(esrap:defrule binary (and #\0 #\b (+ (or "0" "1")))
+(esrap:defrule binary (and #\0 #\B (+ (or "0" "1")))
(:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 2)))
-(esrap:defrule octal (and #\0 #\o (+ (or (esrap:character-ranges (#\0 #\7)))))
+(esrap:defrule octal (and #\0 #\O (+ (or (esrap:character-ranges (#\0 #\7)))))
(:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 8)))
(esrap:defrule decimal (+ (or (esrap:character-ranges (#\0 #\9))))
(:lambda (list) (parse-integer (esrap:text list) :radix 10)))
-(esrap:defrule hex (and #\0 #\x (+ (or (esrap:character-ranges (#\0 #\9))
- "a" "b" "c" "d" "e" "f"
+(esrap:defrule hex (and #\0 #\X (+ (or (esrap:character-ranges (#\0 #\9))
"A" "B" "C" "D" "E" "F")))
(:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 16)))
@@ -30,7 +32,10 @@
(esrap:defrule dereference (and (esrap:? (or #\+ #\-)) int #\( register #\))
(:destructure (s i1 w1 r w2)
(declare (ignore w1 w2))
- (list r (if (and s (string= s "-")) (- i1) i1))))
+ (list r (list 'imm (if (and s (string= s "-")) (- i1) i1)))))
+
+(esrap:defrule immediate int
+ (:lambda (i) (list 'imm i)))
;;; defines rules to parse labels
@@ -56,6 +61,11 @@
(esrap:defrule j-type-3-m (or "PUSH" "POP"))
(defmacro defrule-instr (name type-id order &rest destructure-pattern)
+ "Defines the boilerplate for a common esrap instruction rule.
+NAME is the name of the non-terminal symbol.
+TYPE-ID is the symbol which appears as the first element of a successful parse.
+ORDER is the order to place the parsed tokens in the resulting list.
+DESTRUCTURE-PATTERN is the list of non-terminals on the right side of the grammar rule."
(let* ((pattern-size (length destructure-pattern))
(spaces (mapcar (lambda (x) (read-from-string (format nil "w~A" x))) (util:iota pattern-size)))
(vars (mapcar (lambda (x) (read-from-string (format nil "s~A" x))) (util:iota pattern-size))))
@@ -68,40 +78,41 @@
(defrule-instr r-type-1 'r (1 2 0) register register register)
(defrule-instr r-type-2 'r (1 2 0) register register)
(defrule-instr r-type-3 'r (0 1 2) register register)
-;;(defrule-instr j-type-1 'j (0 1) dereference)
+
+(esrap:defrule i-type-1 (and i-type-1-m space register space dereference)
+ (:destructure (m w1 s w2 di)
+ (declare (ignore w1 w2))
+ `(i ,m ,s ,@di)))
+
+(esrap:defrule i-type-2 (and i-type-2-m space register space dereference)
+ (:destructure (m w1 s w2 di)
+ (declare (ignore w1 w2))
+ `(i ,m ,@(util:insert-in-middle di s))))
+
+(defrule-instr i-type-3 'i (0 1 2) register register immediate)
+(esrap:defrule j-type-1 (and j-type-1-m space dereference)
+ (:destructure (m w di)
+ (declare (ignore w))
+ `(j ,m ,@di)))
+
(defrule-instr j-type-2 'j (1 0) label)
-(defrule-instr j-type-3 'j (0 1) register)
-
-;; (esrap:defrule i-type-1 (and i-type-1-m space register space dereference)
-;; (:destructure (m w1 s w2 di)
-;; (declare (ignore w1 w2))
-;; `(i ,m ,s ,@di)))
-;; (esrap:defrule i-type-2 (and i-type-2-m space register space dereference)
-;; (:destructure (m w1 s w2 di)
-;; (declare (ignore w1 w2))
-;; `(i ,m ,@(util:insert-in-middle di s))))
-;; (esrap:defrule i-type-3 (and i-type-3-m space register space register space int)
-;; (:destructure (m w1 s1 w2 s2 w3 i)
-;; (declare (ignore w1 w2 w3))
-;; (list i m s1 s2 i)))
-;; (esrap:defrule j-type-1 (and j-type-1-m space dereference)
-;; (:destructure (m w di)
-;; (declare (ignore w))
-;; `(j ,m ,@di)))
-;; (esrap:defrule j-type-2 (and j-type-2-m space label)
-;; (:destructure (m w label)
-;; (declare (ignore w))
-;; (list j m "00000" label)))
-;; (esrap:defrule j-type-3 (and j-type-3-m space register)
-;; (:destructure (m w r)
-;; (declare (ignore w))
-;; (list j m r "00000")))
-
-(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 j-type-2 j-type-3))
-
-
-;; (esrap:parse 'register "$3")
-
-;; (esrap:parse 'j-type-1 "JMP 3($3)")
-;; (esrap:parse 'j-type-2 "JRL FOO")
-;; (esrap:parse 'j-type-3 "PUSH $1")
+(esrap:defrule j-type-3 (and j-type-3-m space register)
+ (:destructure (m w r)
+ (declare (ignore w))
+ `(j ,m ,r (imm 0))))
+
+(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 i-type-1 i-type-2
+ i-type-3 j-type-1 j-type-2 j-type-3 label-decl))
+
+;;; defines rules to parse the .text segment
+
+(esrap:defrule instr-clean (and (esrap:? space) instr newline)
+ (:destructure (w1 i w2)
+ (declare (ignore nl l))
+ i))
+
+(esrap:defrule text (and ".TEXT" newline
+ (* instr-clean))
+ (:destructure (txt nl is)
+ (declare (ignore txt nl))
+ (list 'text is)))
diff --git a/t/lex.lisp b/t/lex.lisp
deleted file mode 100644
index 98d7c28..0000000
--- a/t/lex.lisp
+++ /dev/null
@@ -1,104 +0,0 @@
-(in-package #:rva-tests)
-
-(defmacro read-this (str &body body)
- `(let ((*standard-input* (make-string-input-stream ,str)))
- ,@body))
-
-(def-suite lex-tests
- :description "Test functions exported from the lexer."
- :in all-tests)
-
-(in-suite lex-tests)
-
-(test read-token-reads-eof
- (read-this ""
- (is (not (lex:read-token)))))
-
-(test read-token-reads-nl
- (read-this "
-"
- (is (eq (lex:read-token) 'lex::nl))))
-
-(test read-token-reads-left-paren
- (read-this "."
- (is (eq (lex:read-token) 'lex::period))))
-
-(test read-token-reads-left-paren
- (read-this "("
- (is (eq (lex:read-token) 'lex::left-paren))))
-
-(test read-token-reads-right-paren
- (read-this ")"
- (is (eq (lex:read-token) 'lex::right-paren))))
-
-(test read-token-reads-left-paren
- (read-this "$"
- (is (eq (lex:read-token) 'lex::dollar))))
-
-(test read-token-reads-plus
- (read-this "+"
- (is (eq (lex:read-token) 'lex::plus))))
-
-(test read-token-reads-minus
- (read-this "-"
- (is (eq (lex:read-token) 'lex::minus))))
-
-(test read-token-ignores-space
- (read-this " ("
- (is (eq (lex:read-token) 'lex::left-paren))))
-
-(test read-token-ignores-tab
- (read-this " ("
- (is (eq (lex:read-token) 'lex::left-paren))))
-
-(test read-token-ignores-comment
- (read-this "; this is a comment
-("
- (is (eq (lex:read-token) 'lex::nl))))
-
-(test read-token-immediate-zero
- (read-this "0"
- (is (= (lex:read-token) 0))))
-
-(test read-token-immediate-all-digits
- (read-this "123456789"
- (is (= (lex:read-token) 123456789))))
-
-(test read-token-immediate-binary
- (read-this "0b00101010"
- (is (= (lex:read-token) 42))))
-
-(test read-token-immediate-octal
- (read-this "0o052"
- (is (= (lex:read-token) 42))))
-
-(test read-token-immediate-hexadecimal
- (read-this "0x200"
- (is (= (lex:read-token) 512))))
-
-(test read-token-immediate-invalid-immediate
- (handler-case
- (progn (read-this "0v0" (lex:read-token))
- (fail))
- (lex:lexer-error ())))
-
-;; do we want a custom error for this too?
-(test read-token-immediate-radix
- (handler-case
- (progn (read-this "0x" (lex:read-token))
- (fail))
- (sb-int:simple-parse-error ())))
-
-(test read-token-keyword-single
- (read-this "a"
- (is (string= (lex:read-token) "a"))))
-
-(test read-token-keyword-add
- (read-this "addi"
- (is (string= (lex:read-token) "addi"))))
-
-(test read-token-immediate-invalid-keyword
- (handler-case
- (progn (read-this "sub0" (lex:read-token))
- (fail))
- (lex:lexer-error ())))
diff --git a/t/parse.lisp b/t/parse.lisp
index 77db380..a9de6db 100644
--- a/t/parse.lisp
+++ b/t/parse.lisp
@@ -37,22 +37,26 @@
(is (equal '(parse::r "CMP" (parse::rr 1) (parse::rr 5) (parse::rr 0))
(esrap:parse 'parse:instr "CMP $1 $5"))))
-;; (test esrap-i-type-1
-;; (is (equal (list 'parse::i "LOAD" (list 'parse::rr 8) (list 'parse::rr 9) (list 'parse::r 1))
-;; (esrap:parse 'parse:instr "LOAD $8 1($9)"))))
+(test esrap-i-type-1
+ (is (equal '(parse::i "LOAD" (parse::rr 8) (parse::rr 9) (parse::imm 1))
+ (esrap:parse 'parse:instr "LOAD $8 1($9)"))))
-;; (test esrap-i-type-2
-;; (is (equal (list 'parse::i "STORE" (list 'parse::rr 3) (list 'parse::rr 5) (list 'parse::rr 3))
-;; (esrap:parse 'parse:instr "STORE $5 3($3)"))))
+(test esrap-i-type-2
+ (is (equal '(parse::i "STORE" (parse::rr 3) (parse::rr 5) (parse::imm 3))
+ (esrap:parse 'parse:instr "STORE $5 3($3)"))))
-;; (test esrap-i-type-3
-;; (is (equal (list 'parse::i "ORI" (list 'parse::rr 5) (list 'parse::rr 4) (list 'parse::r 2))
-;; (esrap:parse 'parse:instr "ORI $5 $4 2"))))
+(test esrap-i-type-3
+ (is (equal '(parse::i "ORI" (parse::rr 5) (parse::rr 4) (parse::imm 2))
+ (esrap:parse 'parse:instr "ORI $5 $4 2"))))
+
+(test esrap-j-type-1
+ (is (equal '(parse::j "JMP" (parse::rr 3) (parse::imm 3))
+ (esrap:parse 'parse:instr "JMP 3($3)"))))
(test esrap-j-type-2
(is (equal '(parse::j "JRL" (parse::rr 0) (parse::l "FOO"))
(esrap:parse 'parse:instr "JRL FOO"))))
(test esrap-j-type-3
- (is (equal '(parse::j "PUSH" (parse::rr 1) 0)
+ (is (equal '(parse::j "PUSH" (parse::rr 1) (parse::imm 0))
(esrap:parse 'parse:instr "PUSH $1"))))