summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbd <bdunahu@operationnull.com>2025-04-09 19:26:51 -0400
committerbd <bdunahu@operationnull.com>2025-04-09 19:26:51 -0400
commit6ba1871c3825e17d33b96ffd3051239dfe18d61a (patch)
tree5cfca6ae1a6885dfde5d3ff003467d6b3c40fc44
parent4df084d3e2785412eb086fb02ac10be5def695d1 (diff)
Saving first part of large rewrite
-rw-r--r--rva.asd3
-rw-r--r--src/lex.lisp2
-rw-r--r--src/package.lisp15
-rw-r--r--src/parse.lisp204
-rw-r--r--src/util.lisp19
-rw-r--r--t/lex.lisp4
-rw-r--r--t/parse.lisp96
7 files changed, 178 insertions, 165 deletions
diff --git a/rva.asd b/rva.asd
index 83dc9fe..dd904fb 100644
--- a/rva.asd
+++ b/rva.asd
@@ -9,7 +9,8 @@
:description "Assembler for the RISC-V[ECTOR] mini-ISA."
:source-control (:git "git@github.com:bdunahu/rva.git")
:depends-on (:uiop
- :clingon)
+ :clingon
+ :esrap)
:components ((:module "src"
:serial t
:components ((:file "package")
diff --git a/src/lex.lisp b/src/lex.lisp
index 5b1457d..c86d17c 100644
--- a/src/lex.lisp
+++ b/src/lex.lisp
@@ -42,6 +42,8 @@ Comments start with a semi-colon ';' and all tokens after are ignored."
(progn (read-line *standard-input* nil)
'nl))
+ ((char= chr #\.) 'period)
+
((char= chr #\() 'left-paren)
((char= chr #\)) 'right-paren)
diff --git a/src/package.lisp b/src/package.lisp
index 7a14bd2..b856e38 100644
--- a/src/package.lisp
+++ b/src/package.lisp
@@ -6,10 +6,9 @@
(:use #:cl)
(:export #:asm-extension?
#:format-as-binary
- #:type-r
- #:type-i
- #:type-j
- #:label-loc))
+ #:insert-in-middle
+ #:iota
+ #:riffle))
(defpackage #:lex
(:use #:cl)
@@ -22,7 +21,7 @@
(:use #:cl)
(:export #:parser-error
#:tokens->ast
- #:to-register
- ;; exported for testing only
- #:extract-label
- #:extract-r-type))
+ ;; exported for testing only
+ #:register
+ #:instr
+ ))
diff --git a/src/parse.lisp b/src/parse.lisp
index 37415c8..54b0709 100644
--- a/src/parse.lisp
+++ b/src/parse.lisp
@@ -1,101 +1,107 @@
(in-package #:parse)
-(defparameter reg-loc
- '(("zr" . 0) ("lk" . 1)
- ("sp" . 2) ("cc" . 3)
- ("vl" . 4) ("fp" . 5))
- "A symbol table mapping register aliases to identifiers. If you want to add
-a new alias, do it here.")
-
-(defparameter label-loc '()
- "A symbol table mapping label names to line indices.")
-
-(define-condition parser-error (error)
- ((message :initarg :message
- :initform nil
- :reader message))
- (:report (lambda (condition stream)
- (format stream "~A" (message condition))))
- (:documentation "Dedicated error for an invalid parse."))
-
-(defun to-register (tokens)
- "Attempts to parse a register from the start of TOKENS. If it is badly formed,
-throws a PARSER-ERROR."
- (or (and (equal (car tokens) 'LEX::DOLLAR)
- (cadr tokens)
- (let ((r (cadr tokens)))
- (cond ((stringp r) (cdr (assoc r reg-loc :test #'string=)))
- ((numberp r) (and (<= 0 r 23) r)))))
- (error 'parser-error
- :message
- (format nil "PARSE failed--Expected register, got ~a.~%"
- (subseq tokens 0 (min 2 (length tokens)))))))
-
-(defun tokens->ast (program)
- "Given PROGRAM, which is a list of lists of symbols,
-filters out the labels and parses."
- ;; TODO add directives
- (let ((program (remove nil (mapcar #'extract-label program)))
- (i 0))
- (mapcar (lambda (l) (extract-instruction l i)) program)))
-
-(let ((i 0))
- (defun extract-label (line)
- "Given a series of tokens LINE, determines if LINE is
-in the form STRING {colon}. If it is, then it is treated as a
-label, and pushed onto the stack with the line index.
-
-Note that this function is intended to be called using mapcar,
-so that labels can be added to a map and otherwise removed from
-processing."
- (if (and (equal 2 (length line))
- (stringp (car line))
- (equal 'lex::colon (cadr line)))
- (progn (push (cons (read-from-string (car line)) i) label-loc) nil)
- (progn (incf i) line))))
-
-(defun extract-instruction (line i)
- "Given instruction LINE, determines the expected type format and passes
-LINE and the index I to the the respective function."
- (let* ((mnemonic (intern (string-upcase (car line)) :util))
- ;; TODO add pseudo-ops (i.e., nop, leave, ret...)
- ;; should probably be their own extract function
- (type-fn (cond
- ((member mnemonic util:type-r) #'extract-r-type)
- ((member mnemonic util:type-i) #'extract-i-type)
- ((member mnemonic util:type-j) #'extract-j-type))))
- (if type-fn
- (funcall type-fn line i)
- (error 'parser-error
- :message
- (format nil "PARSE failed--~a is not a known keyword.~%" mnemonic)))))
-
-(defun extract-r-type (line i)
- (let ((mnemonic (intern (string-upcase (car line)) :util)))
- (defun die ()
- (error 'parser-error
- :message
- (format nil "PARSE failed---Incorrect number of operands for ~a" mnemonic)))
- (defun eat-registers (registers-so-far lst)
- (if (not (null lst))
- (eat-registers (cons (to-register lst) registers-so-far)
- (cddr lst))
- (reverse registers-so-far)))
- (let* ((registers (eat-registers '() (cdr line)))
- ;; handle special cases
- (registers (cond ((member mnemonic '(util::CMP util::CEV))
- (if (= 2 (length registers))
- (cons 0 registers)
- (die)))
- ((member mnemonic '(util::NOT))
- (if (= 2 (length registers))
- (append registers (list 0))
- (die)))
- (t (if (= 3 (length registers)) registers (die))))))
- (list :op mnemonic :d (car registers) :s1 (cadr registers) :s2 (caddr registers)))))
-
-(defun extract-i-type (line i)
- line)
-
-(defun extract-j-type (line i)
- line)
+(esrap:defrule space
+ (+ (or #\Space #\Tab))
+ (:constant nil))
+
+;;; defines rules to parse an integer in various bases
+
+(esrap:defrule binary (and #\0 #\b (+ (or "0" "1")))
+ (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 2)))
+
+(esrap:defrule octal (and #\0 #\o (+ (or (esrap:character-ranges (#\0 #\7)))))
+ (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 8)))
+
+(esrap:defrule decimal (+ (or (esrap:character-ranges (#\0 #\9))))
+ (:lambda (list) (parse-integer (esrap:text list) :radix 10)))
+
+(esrap:defrule hex (and #\0 #\x (+ (or (esrap:character-ranges (#\0 #\9))
+ "a" "b" "c" "d" "e" "f"
+ "A" "B" "C" "D" "E" "F")))
+ (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 16)))
+
+(esrap:defrule int (or binary octal hex decimal))
+
+;;; defines rules to parse an operand
+
+(esrap:defrule register (and #\$ (or int reg-id))
+ (:lambda (list) (list 'rr (cadr list))))
+
+(esrap:defrule dereference (and (esrap:? (or #\+ #\-)) int #\( register #\))
+ (:destructure (s i1 w1 r w2)
+ (declare (ignore w1 w2))
+ (list r (if (and s (string= s "-")) (- i1) i1))))
+
+;;; defines rules to parse labels
+
+(esrap:defrule label (+ (alphanumericp character))
+ (:lambda (list) (list 'l (esrap:text list))))
+
+(esrap:defrule label-decl (and label #\:)
+ (:destructure (l w)
+ (declare (ignore w))
+ l))
+
+;;; defines rules to parse instruction types
+
+(esrap:defrule r-type-1-m (or "ADDV" "SUBV" "MULV" "DIVV" "ADD" "SUB" "MUL"
+ "QUOT" "REM" "SFTR" "SFTL" "AND" "OR" "NOT" "XOR" ))
+(esrap:defrule r-type-2-m "NOT")
+(esrap:defrule r-type-3-m (or "CMP" "CEV"))
+(esrap:defrule i-type-1-m (or "LOADV" "LOAD"))
+(esrap:defrule i-type-2-m (or "STOREV" "STORE"))
+(esrap:defrule i-type-3-m (or "ADDI" "SUBI" "SFTRI" "SFTLI" "ANDI" "ORI" "XORI"))
+(esrap:defrule j-type-1-m (or "JMP" "JAL"))
+(esrap:defrule j-type-2-m (or "JRL" "BEQ" "BGT" "BUF" "BOF"))
+(esrap:defrule j-type-3-m (or "PUSH" "POP"))
+
+(defmacro defrule-instr (name type-id order &rest destructure-pattern)
+ (let* ((pattern-size (length destructure-pattern))
+ (spaces (mapcar (lambda (x) (read-from-string (format nil "w~A" x))) (util:iota pattern-size)))
+ (vars (mapcar (lambda (x) (read-from-string (format nil "s~A" x))) (util:iota pattern-size))))
+ `(esrap:defrule ,name
+ (and ,(read-from-string (format nil "~A-m" name)) ,@(util:riffle (make-list pattern-size :initial-element 'space) destructure-pattern))
+ (:destructure (m ,@(util:riffle spaces vars))
+ (declare (ignore ,@spaces))
+ (list ,type-id m ,@(mapcar (lambda (x) (or (nth x vars) ''(rr 0))) order))))))
+
+(defrule-instr r-type-1 'r (1 2 0) register register register)
+(defrule-instr r-type-2 'r (1 2 0) register register)
+(defrule-instr r-type-3 'r (0 1 2) register register)
+;;(defrule-instr j-type-1 'j (0 1) dereference)
+(defrule-instr j-type-2 'j (1 0) label)
+(defrule-instr j-type-3 'j (0 1) register)
+
+;; (esrap:defrule i-type-1 (and i-type-1-m space register space dereference)
+;; (:destructure (m w1 s w2 di)
+;; (declare (ignore w1 w2))
+;; `(i ,m ,s ,@di)))
+;; (esrap:defrule i-type-2 (and i-type-2-m space register space dereference)
+;; (:destructure (m w1 s w2 di)
+;; (declare (ignore w1 w2))
+;; `(i ,m ,@(util:insert-in-middle di s))))
+;; (esrap:defrule i-type-3 (and i-type-3-m space register space register space int)
+;; (:destructure (m w1 s1 w2 s2 w3 i)
+;; (declare (ignore w1 w2 w3))
+;; (list i m s1 s2 i)))
+;; (esrap:defrule j-type-1 (and j-type-1-m space dereference)
+;; (:destructure (m w di)
+;; (declare (ignore w))
+;; `(j ,m ,@di)))
+;; (esrap:defrule j-type-2 (and j-type-2-m space label)
+;; (:destructure (m w label)
+;; (declare (ignore w))
+;; (list j m "00000" label)))
+;; (esrap:defrule j-type-3 (and j-type-3-m space register)
+;; (:destructure (m w r)
+;; (declare (ignore w))
+;; (list j m r "00000")))
+
+(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 j-type-2 j-type-3))
+
+
+;; (esrap:parse 'register "$3")
+
+;; (esrap:parse 'j-type-1 "JMP 3($3)")
+;; (esrap:parse 'j-type-2 "JRL FOO")
+;; (esrap:parse 'j-type-3 "PUSH $1")
diff --git a/src/util.lisp b/src/util.lisp
index 027a770..93db659 100644
--- a/src/util.lisp
+++ b/src/util.lisp
@@ -11,6 +11,25 @@
(declare (type (integer 0 *) len))
(format nil "~V,'0b" len num))
+(defun insert-in-middle (list element)
+ (append (list (car list)) (list element) (cdr list)))
+
+(defun iota (n)
+ "Generates a number sequence from 0 to N."
+ (when (> n 0)
+ (do ((i 0 (1+ i))
+ (item 0 (1+ item))
+ (result nil (push item result)))
+ ((= i n) (nreverse result)))))
+
+(defun riffle (lst1 lst2)
+ "Given LST1 and LST2, returns a new list which is the an alternative sequence
+of the elements from both lists. Returns nil if the lists are not equal size."
+ (when (eq (length lst1) (length lst2))
+ (loop for l1 in lst1
+ for l2 in lst2
+ append (list l1 l2))))
+
(defparameter type-r
'(ADD SUB MUL QUOT REM SFTR SFTL AND OR NOT XOR ADDV SUBV MULV DIVV CMP CEV)
"R-type instructions.")
diff --git a/t/lex.lisp b/t/lex.lisp
index 7a20608..98d7c28 100644
--- a/t/lex.lisp
+++ b/t/lex.lisp
@@ -20,6 +20,10 @@
(is (eq (lex:read-token) 'lex::nl))))
(test read-token-reads-left-paren
+ (read-this "."
+ (is (eq (lex:read-token) 'lex::period))))
+
+(test read-token-reads-left-paren
(read-this "("
(is (eq (lex:read-token) 'lex::left-paren))))
diff --git a/t/parse.lisp b/t/parse.lisp
index 7187292..77db380 100644
--- a/t/parse.lisp
+++ b/t/parse.lisp
@@ -1,76 +1,58 @@
(in-package #:rva-tests)
-(defmacro expect-parse-error (body)
- `(handler-case
- (progn ,body
- (fail))
- (parse::parser-error ())))
-
(def-suite parse-tests
:description "Test functions exported from the parser."
:in all-tests)
-(in-suite parse-tests)
-
-(test extract-label-is-a-label
- (is (not (parse:extract-label '("LOOP" lex::colon)))))
-
-(test extract-label-not-a-label-one
- (let ((lst '("NICE" "TRY")))
- (is (equal lst
- (parse:extract-label lst)))))
+;;; these tests are not exhaustive, and are meant to test basic functionality
+;;; under correct circumstances.
-(test extract-label-not-a-label-two
- (let ((lst '("LOOP" lex::colon lex::colon)))
- (is (equal lst
- (parse:extract-label lst)))))
-
-(test extract-line-invalid-type
- (expect-parse-error (parse:tokens->ast '(("foo" lex::dollar)))))
-
-(test to-register-nil
- (expect-parse-error (parse:to-register '())))
-
-(test to-register-singleton
- (expect-parse-error (parse:to-register '(lex::dollar))))
+(in-suite parse-tests)
-(test to-register-zero
- (is (= 0 (parse:to-register '(lex::dollar 0)))))
+(test esrap-register-decimal-ten
+ (is (equal (list 'parse::rr 10)
+ (esrap:parse 'parse::register "$10"))))
-(test to-register-one
- (is (= 1 (parse:to-register '(lex::dollar 1)))))
+(test esrap-register-binary-ten
+ (is (equal (list 'parse::rr 10)
+ (esrap:parse 'parse::register "$0b1010"))))
-(test to-register-twenty-three
- (is (= 23 (parse:to-register '(lex::dollar 23)))))
+(test esrap-register-octal-ten
+ (is (equal (list 'parse::rr 10)
+ (esrap:parse 'parse::register "$0o12"))))
-(test to-register-zero-named
- (is (= 0 (parse:to-register '(lex::dollar "zr")))))
+(test esrap-register-hex-ten
+ (is (equal (list 'parse::rr 10)
+ (esrap:parse 'parse::register "$0xa"))))
-(test to-register-twenty-four
- (expect-parse-error (parse:to-register '(lex::dollar 24))))
+(test esrap-r-type-1
+ (is (equal '(parse::r "ADD" (parse::rr 5) (parse::rr 8) (parse::rr 1))
+ (esrap:parse 'parse:instr "ADD $1 $5 $8"))))
-(test to-register-negative-one
- (expect-parse-error (parse:to-register '(lex::dollar -1))))
+(test esrap-r-type-2
+ (is (equal '(parse::r "NOT" (parse::rr 5) (parse::rr 0) (parse::rr 1))
+ (esrap:parse 'parse:instr "NOT $1 $5"))))
-(test extract-r-type-no-registers
- (expect-parse-error (parse:extract-r-type '("add") 0)))
+(test esrap-r-type-3
+ (is (equal '(parse::r "CMP" (parse::rr 1) (parse::rr 5) (parse::rr 0))
+ (esrap:parse 'parse:instr "CMP $1 $5"))))
-(test extract-r-type-two-registers
- (expect-parse-error (parse:extract-r-type '("add" lex::dollar 2 lex::dollar 3) 0)))
+;; (test esrap-i-type-1
+;; (is (equal (list 'parse::i "LOAD" (list 'parse::rr 8) (list 'parse::rr 9) (list 'parse::r 1))
+;; (esrap:parse 'parse:instr "LOAD $8 1($9)"))))
-(test extract-r-type-cmp-three-registers
- (expect-parse-error (parse:extract-r-type '("cmp" lex::dollar 2
- lex::dollar 3 lex::dollar 4) 0)))
+;; (test esrap-i-type-2
+;; (is (equal (list 'parse::i "STORE" (list 'parse::rr 3) (list 'parse::rr 5) (list 'parse::rr 3))
+;; (esrap:parse 'parse:instr "STORE $5 3($3)"))))
-(test extract-r-type-simple-add
- (is (equal '(:op util::ADD :d 2 :s1 3 :s2 4)
- (parse:extract-r-type '("add" lex::dollar 2
- lex::dollar 3 lex::dollar 4) 0))))
+;; (test esrap-i-type-3
+;; (is (equal (list 'parse::i "ORI" (list 'parse::rr 5) (list 'parse::rr 4) (list 'parse::r 2))
+;; (esrap:parse 'parse:instr "ORI $5 $4 2"))))
-(test extract-r-type-simple-not
- (is (equal '(:op util::NOT :d 2 :s1 3 :s2 0)
- (parse:extract-r-type '("not" lex::dollar 2 lex::dollar 3) 0))))
+(test esrap-j-type-2
+ (is (equal '(parse::j "JRL" (parse::rr 0) (parse::l "FOO"))
+ (esrap:parse 'parse:instr "JRL FOO"))))
-(test extract-r-type-simple-cmp
- (is (equal '(:op util::CMP :d 0 :s1 2 :s2 3)
- (parse:extract-r-type '("cmp" lex::dollar 2 lex::dollar 3) 0))))
+(test esrap-j-type-3
+ (is (equal '(parse::j "PUSH" (parse::rr 1) 0)
+ (esrap:parse 'parse:instr "PUSH $1"))))