From 4df084d3e2785412eb086fb02ac10be5def695d1 Mon Sep 17 00:00:00 2001 From: bd Date: Wed, 9 Apr 2025 09:28:22 -0400 Subject: Remove trivia --- rva.asd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'rva.asd') diff --git a/rva.asd b/rva.asd index 7fdb00b..83dc9fe 100644 --- a/rva.asd +++ b/rva.asd @@ -9,8 +9,7 @@ :description "Assembler for the RISC-V[ECTOR] mini-ISA." :source-control (:git "git@github.com:bdunahu/rva.git") :depends-on (:uiop - :clingon - :trivia) + :clingon) :components ((:module "src" :serial t :components ((:file "package") -- cgit v1.2.3 From 6ba1871c3825e17d33b96ffd3051239dfe18d61a Mon Sep 17 00:00:00 2001 From: bd Date: Wed, 9 Apr 2025 19:26:51 -0400 Subject: Saving first part of large rewrite --- rva.asd | 3 +- src/lex.lisp | 2 + src/package.lisp | 15 ++-- src/parse.lisp | 204 ++++++++++++++++++++++++++++--------------------------- src/util.lisp | 19 ++++++ t/lex.lisp | 4 ++ t/parse.lisp | 96 +++++++++++--------------- 7 files changed, 178 insertions(+), 165 deletions(-) (limited to 'rva.asd') diff --git a/rva.asd b/rva.asd index 83dc9fe..dd904fb 100644 --- a/rva.asd +++ b/rva.asd @@ -9,7 +9,8 @@ :description "Assembler for the RISC-V[ECTOR] mini-ISA." :source-control (:git "git@github.com:bdunahu/rva.git") :depends-on (:uiop - :clingon) + :clingon + :esrap) :components ((:module "src" :serial t :components ((:file "package") diff --git a/src/lex.lisp b/src/lex.lisp index 5b1457d..c86d17c 100644 --- a/src/lex.lisp +++ b/src/lex.lisp @@ -42,6 +42,8 @@ Comments start with a semi-colon ';' and all tokens after are ignored." (progn (read-line *standard-input* nil) 'nl)) + ((char= chr #\.) 'period) + ((char= chr #\() 'left-paren) ((char= chr #\)) 'right-paren) diff --git a/src/package.lisp b/src/package.lisp index 7a14bd2..b856e38 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -6,10 +6,9 @@ (:use #:cl) (:export #:asm-extension? #:format-as-binary - #:type-r - #:type-i - #:type-j - #:label-loc)) + #:insert-in-middle + #:iota + #:riffle)) (defpackage #:lex (:use #:cl) @@ -22,7 +21,7 @@ (:use #:cl) (:export #:parser-error #:tokens->ast - #:to-register - ;; exported for testing only - #:extract-label - #:extract-r-type)) + ;; exported for testing only + #:register + #:instr + )) diff --git a/src/parse.lisp b/src/parse.lisp index 37415c8..54b0709 100644 --- a/src/parse.lisp +++ b/src/parse.lisp @@ -1,101 +1,107 @@ (in-package #:parse) -(defparameter reg-loc - '(("zr" . 0) ("lk" . 1) - ("sp" . 2) ("cc" . 3) - ("vl" . 4) ("fp" . 5)) - "A symbol table mapping register aliases to identifiers. If you want to add -a new alias, do it here.") - -(defparameter label-loc '() - "A symbol table mapping label names to line indices.") - -(define-condition parser-error (error) - ((message :initarg :message - :initform nil - :reader message)) - (:report (lambda (condition stream) - (format stream "~A" (message condition)))) - (:documentation "Dedicated error for an invalid parse.")) - -(defun to-register (tokens) - "Attempts to parse a register from the start of TOKENS. If it is badly formed, -throws a PARSER-ERROR." - (or (and (equal (car tokens) 'LEX::DOLLAR) - (cadr tokens) - (let ((r (cadr tokens))) - (cond ((stringp r) (cdr (assoc r reg-loc :test #'string=))) - ((numberp r) (and (<= 0 r 23) r))))) - (error 'parser-error - :message - (format nil "PARSE failed--Expected register, got ~a.~%" - (subseq tokens 0 (min 2 (length tokens))))))) - -(defun tokens->ast (program) - "Given PROGRAM, which is a list of lists of symbols, -filters out the labels and parses." - ;; TODO add directives - (let ((program (remove nil (mapcar #'extract-label program))) - (i 0)) - (mapcar (lambda (l) (extract-instruction l i)) program))) - -(let ((i 0)) - (defun extract-label (line) - "Given a series of tokens LINE, determines if LINE is -in the form STRING {colon}. If it is, then it is treated as a -label, and pushed onto the stack with the line index. - -Note that this function is intended to be called using mapcar, -so that labels can be added to a map and otherwise removed from -processing." - (if (and (equal 2 (length line)) - (stringp (car line)) - (equal 'lex::colon (cadr line))) - (progn (push (cons (read-from-string (car line)) i) label-loc) nil) - (progn (incf i) line)))) - -(defun extract-instruction (line i) - "Given instruction LINE, determines the expected type format and passes -LINE and the index I to the the respective function." - (let* ((mnemonic (intern (string-upcase (car line)) :util)) - ;; TODO add pseudo-ops (i.e., nop, leave, ret...) - ;; should probably be their own extract function - (type-fn (cond - ((member mnemonic util:type-r) #'extract-r-type) - ((member mnemonic util:type-i) #'extract-i-type) - ((member mnemonic util:type-j) #'extract-j-type)))) - (if type-fn - (funcall type-fn line i) - (error 'parser-error - :message - (format nil "PARSE failed--~a is not a known keyword.~%" mnemonic))))) - -(defun extract-r-type (line i) - (let ((mnemonic (intern (string-upcase (car line)) :util))) - (defun die () - (error 'parser-error - :message - (format nil "PARSE failed---Incorrect number of operands for ~a" mnemonic))) - (defun eat-registers (registers-so-far lst) - (if (not (null lst)) - (eat-registers (cons (to-register lst) registers-so-far) - (cddr lst)) - (reverse registers-so-far))) - (let* ((registers (eat-registers '() (cdr line))) - ;; handle special cases - (registers (cond ((member mnemonic '(util::CMP util::CEV)) - (if (= 2 (length registers)) - (cons 0 registers) - (die))) - ((member mnemonic '(util::NOT)) - (if (= 2 (length registers)) - (append registers (list 0)) - (die))) - (t (if (= 3 (length registers)) registers (die)))))) - (list :op mnemonic :d (car registers) :s1 (cadr registers) :s2 (caddr registers))))) - -(defun extract-i-type (line i) - line) - -(defun extract-j-type (line i) - line) +(esrap:defrule space + (+ (or #\Space #\Tab)) + (:constant nil)) + +;;; defines rules to parse an integer in various bases + +(esrap:defrule binary (and #\0 #\b (+ (or "0" "1"))) + (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 2))) + +(esrap:defrule octal (and #\0 #\o (+ (or (esrap:character-ranges (#\0 #\7))))) + (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 8))) + +(esrap:defrule decimal (+ (or (esrap:character-ranges (#\0 #\9)))) + (:lambda (list) (parse-integer (esrap:text list) :radix 10))) + +(esrap:defrule hex (and #\0 #\x (+ (or (esrap:character-ranges (#\0 #\9)) + "a" "b" "c" "d" "e" "f" + "A" "B" "C" "D" "E" "F"))) + (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 16))) + +(esrap:defrule int (or binary octal hex decimal)) + +;;; defines rules to parse an operand + +(esrap:defrule register (and #\$ (or int reg-id)) + (:lambda (list) (list 'rr (cadr list)))) + +(esrap:defrule dereference (and (esrap:? (or #\+ #\-)) int #\( register #\)) + (:destructure (s i1 w1 r w2) + (declare (ignore w1 w2)) + (list r (if (and s (string= s "-")) (- i1) i1)))) + +;;; defines rules to parse labels + +(esrap:defrule label (+ (alphanumericp character)) + (:lambda (list) (list 'l (esrap:text list)))) + +(esrap:defrule label-decl (and label #\:) + (:destructure (l w) + (declare (ignore w)) + l)) + +;;; defines rules to parse instruction types + +(esrap:defrule r-type-1-m (or "ADDV" "SUBV" "MULV" "DIVV" "ADD" "SUB" "MUL" + "QUOT" "REM" "SFTR" "SFTL" "AND" "OR" "NOT" "XOR" )) +(esrap:defrule r-type-2-m "NOT") +(esrap:defrule r-type-3-m (or "CMP" "CEV")) +(esrap:defrule i-type-1-m (or "LOADV" "LOAD")) +(esrap:defrule i-type-2-m (or "STOREV" "STORE")) +(esrap:defrule i-type-3-m (or "ADDI" "SUBI" "SFTRI" "SFTLI" "ANDI" "ORI" "XORI")) +(esrap:defrule j-type-1-m (or "JMP" "JAL")) +(esrap:defrule j-type-2-m (or "JRL" "BEQ" "BGT" "BUF" "BOF")) +(esrap:defrule j-type-3-m (or "PUSH" "POP")) + +(defmacro defrule-instr (name type-id order &rest destructure-pattern) + (let* ((pattern-size (length destructure-pattern)) + (spaces (mapcar (lambda (x) (read-from-string (format nil "w~A" x))) (util:iota pattern-size))) + (vars (mapcar (lambda (x) (read-from-string (format nil "s~A" x))) (util:iota pattern-size)))) + `(esrap:defrule ,name + (and ,(read-from-string (format nil "~A-m" name)) ,@(util:riffle (make-list pattern-size :initial-element 'space) destructure-pattern)) + (:destructure (m ,@(util:riffle spaces vars)) + (declare (ignore ,@spaces)) + (list ,type-id m ,@(mapcar (lambda (x) (or (nth x vars) ''(rr 0))) order)))))) + +(defrule-instr r-type-1 'r (1 2 0) register register register) +(defrule-instr r-type-2 'r (1 2 0) register register) +(defrule-instr r-type-3 'r (0 1 2) register register) +;;(defrule-instr j-type-1 'j (0 1) dereference) +(defrule-instr j-type-2 'j (1 0) label) +(defrule-instr j-type-3 'j (0 1) register) + +;; (esrap:defrule i-type-1 (and i-type-1-m space register space dereference) +;; (:destructure (m w1 s w2 di) +;; (declare (ignore w1 w2)) +;; `(i ,m ,s ,@di))) +;; (esrap:defrule i-type-2 (and i-type-2-m space register space dereference) +;; (:destructure (m w1 s w2 di) +;; (declare (ignore w1 w2)) +;; `(i ,m ,@(util:insert-in-middle di s)))) +;; (esrap:defrule i-type-3 (and i-type-3-m space register space register space int) +;; (:destructure (m w1 s1 w2 s2 w3 i) +;; (declare (ignore w1 w2 w3)) +;; (list i m s1 s2 i))) +;; (esrap:defrule j-type-1 (and j-type-1-m space dereference) +;; (:destructure (m w di) +;; (declare (ignore w)) +;; `(j ,m ,@di))) +;; (esrap:defrule j-type-2 (and j-type-2-m space label) +;; (:destructure (m w label) +;; (declare (ignore w)) +;; (list j m "00000" label))) +;; (esrap:defrule j-type-3 (and j-type-3-m space register) +;; (:destructure (m w r) +;; (declare (ignore w)) +;; (list j m r "00000"))) + +(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 j-type-2 j-type-3)) + + +;; (esrap:parse 'register "$3") + +;; (esrap:parse 'j-type-1 "JMP 3($3)") +;; (esrap:parse 'j-type-2 "JRL FOO") +;; (esrap:parse 'j-type-3 "PUSH $1") diff --git a/src/util.lisp b/src/util.lisp index 027a770..93db659 100644 --- a/src/util.lisp +++ b/src/util.lisp @@ -11,6 +11,25 @@ (declare (type (integer 0 *) len)) (format nil "~V,'0b" len num)) +(defun insert-in-middle (list element) + (append (list (car list)) (list element) (cdr list))) + +(defun iota (n) + "Generates a number sequence from 0 to N." + (when (> n 0) + (do ((i 0 (1+ i)) + (item 0 (1+ item)) + (result nil (push item result))) + ((= i n) (nreverse result))))) + +(defun riffle (lst1 lst2) + "Given LST1 and LST2, returns a new list which is the an alternative sequence +of the elements from both lists. Returns nil if the lists are not equal size." + (when (eq (length lst1) (length lst2)) + (loop for l1 in lst1 + for l2 in lst2 + append (list l1 l2)))) + (defparameter type-r '(ADD SUB MUL QUOT REM SFTR SFTL AND OR NOT XOR ADDV SUBV MULV DIVV CMP CEV) "R-type instructions.") diff --git a/t/lex.lisp b/t/lex.lisp index 7a20608..98d7c28 100644 --- a/t/lex.lisp +++ b/t/lex.lisp @@ -19,6 +19,10 @@ " (is (eq (lex:read-token) 'lex::nl)))) +(test read-token-reads-left-paren + (read-this "." + (is (eq (lex:read-token) 'lex::period)))) + (test read-token-reads-left-paren (read-this "(" (is (eq (lex:read-token) 'lex::left-paren)))) diff --git a/t/parse.lisp b/t/parse.lisp index 7187292..77db380 100644 --- a/t/parse.lisp +++ b/t/parse.lisp @@ -1,76 +1,58 @@ (in-package #:rva-tests) -(defmacro expect-parse-error (body) - `(handler-case - (progn ,body - (fail)) - (parse::parser-error ()))) - (def-suite parse-tests :description "Test functions exported from the parser." :in all-tests) -(in-suite parse-tests) - -(test extract-label-is-a-label - (is (not (parse:extract-label '("LOOP" lex::colon))))) - -(test extract-label-not-a-label-one - (let ((lst '("NICE" "TRY"))) - (is (equal lst - (parse:extract-label lst))))) +;;; these tests are not exhaustive, and are meant to test basic functionality +;;; under correct circumstances. -(test extract-label-not-a-label-two - (let ((lst '("LOOP" lex::colon lex::colon))) - (is (equal lst - (parse:extract-label lst))))) - -(test extract-line-invalid-type - (expect-parse-error (parse:tokens->ast '(("foo" lex::dollar))))) - -(test to-register-nil - (expect-parse-error (parse:to-register '()))) - -(test to-register-singleton - (expect-parse-error (parse:to-register '(lex::dollar)))) +(in-suite parse-tests) -(test to-register-zero - (is (= 0 (parse:to-register '(lex::dollar 0))))) +(test esrap-register-decimal-ten + (is (equal (list 'parse::rr 10) + (esrap:parse 'parse::register "$10")))) -(test to-register-one - (is (= 1 (parse:to-register '(lex::dollar 1))))) +(test esrap-register-binary-ten + (is (equal (list 'parse::rr 10) + (esrap:parse 'parse::register "$0b1010")))) -(test to-register-twenty-three - (is (= 23 (parse:to-register '(lex::dollar 23))))) +(test esrap-register-octal-ten + (is (equal (list 'parse::rr 10) + (esrap:parse 'parse::register "$0o12")))) -(test to-register-zero-named - (is (= 0 (parse:to-register '(lex::dollar "zr"))))) +(test esrap-register-hex-ten + (is (equal (list 'parse::rr 10) + (esrap:parse 'parse::register "$0xa")))) -(test to-register-twenty-four - (expect-parse-error (parse:to-register '(lex::dollar 24)))) +(test esrap-r-type-1 + (is (equal '(parse::r "ADD" (parse::rr 5) (parse::rr 8) (parse::rr 1)) + (esrap:parse 'parse:instr "ADD $1 $5 $8")))) -(test to-register-negative-one - (expect-parse-error (parse:to-register '(lex::dollar -1)))) +(test esrap-r-type-2 + (is (equal '(parse::r "NOT" (parse::rr 5) (parse::rr 0) (parse::rr 1)) + (esrap:parse 'parse:instr "NOT $1 $5")))) -(test extract-r-type-no-registers - (expect-parse-error (parse:extract-r-type '("add") 0))) +(test esrap-r-type-3 + (is (equal '(parse::r "CMP" (parse::rr 1) (parse::rr 5) (parse::rr 0)) + (esrap:parse 'parse:instr "CMP $1 $5")))) -(test extract-r-type-two-registers - (expect-parse-error (parse:extract-r-type '("add" lex::dollar 2 lex::dollar 3) 0))) +;; (test esrap-i-type-1 +;; (is (equal (list 'parse::i "LOAD" (list 'parse::rr 8) (list 'parse::rr 9) (list 'parse::r 1)) +;; (esrap:parse 'parse:instr "LOAD $8 1($9)")))) -(test extract-r-type-cmp-three-registers - (expect-parse-error (parse:extract-r-type '("cmp" lex::dollar 2 - lex::dollar 3 lex::dollar 4) 0))) +;; (test esrap-i-type-2 +;; (is (equal (list 'parse::i "STORE" (list 'parse::rr 3) (list 'parse::rr 5) (list 'parse::rr 3)) +;; (esrap:parse 'parse:instr "STORE $5 3($3)")))) -(test extract-r-type-simple-add - (is (equal '(:op util::ADD :d 2 :s1 3 :s2 4) - (parse:extract-r-type '("add" lex::dollar 2 - lex::dollar 3 lex::dollar 4) 0)))) +;; (test esrap-i-type-3 +;; (is (equal (list 'parse::i "ORI" (list 'parse::rr 5) (list 'parse::rr 4) (list 'parse::r 2)) +;; (esrap:parse 'parse:instr "ORI $5 $4 2")))) -(test extract-r-type-simple-not - (is (equal '(:op util::NOT :d 2 :s1 3 :s2 0) - (parse:extract-r-type '("not" lex::dollar 2 lex::dollar 3) 0)))) +(test esrap-j-type-2 + (is (equal '(parse::j "JRL" (parse::rr 0) (parse::l "FOO")) + (esrap:parse 'parse:instr "JRL FOO")))) -(test extract-r-type-simple-cmp - (is (equal '(:op util::CMP :d 0 :s1 2 :s2 3) - (parse:extract-r-type '("cmp" lex::dollar 2 lex::dollar 3) 0)))) +(test esrap-j-type-3 + (is (equal '(parse::j "PUSH" (parse::rr 1) 0) + (esrap:parse 'parse:instr "PUSH $1")))) -- cgit v1.2.3 From ce80845b5cb5b473327e4e561342490576d83a90 Mon Sep 17 00:00:00 2001 From: bd Date: Wed, 9 Apr 2025 20:25:57 -0400 Subject: Successfully parse every instruction type --- input/add-loop.asm | 13 ++++--- rva.asd | 4 +- src/lex.lisp | 110 ----------------------------------------------------- src/main.lisp | 21 ++-------- src/package.lisp | 12 +----- src/parse.lisp | 95 +++++++++++++++++++++++++-------------------- t/lex.lisp | 104 -------------------------------------------------- t/parse.lisp | 24 +++++++----- 8 files changed, 81 insertions(+), 302 deletions(-) delete mode 100644 src/lex.lisp delete mode 100644 t/lex.lisp (limited to 'rva.asd') diff --git a/input/add-loop.asm b/input/add-loop.asm index 6379831..f8d97a1 100644 --- a/input/add-loop.asm +++ b/input/add-loop.asm @@ -1,17 +1,18 @@ - addi $fp $0 0x200 +.text + addi $2 $0 0x200 addi $5 $0 0x1 - store $5 0($fp) + store $5 0($2) addi $5 $0 0x2 - store $5 1($fp) + store $5 1($2) addi $5 $0 0x3 - store $5 2($fp) + store $5 2($2) addi $5 $0 0x4 - store $5 3($fp) + store $5 3($2) addi $5 $0 0x0 addi $6 $0 0x3 jrl CHECK LOOP: - add $9 $fp $5 + add $9 $2 $5 load $7 -0($9) load $8 +1($9) add $7 $7 $8 diff --git a/rva.asd b/rva.asd index dd904fb..9271472 100644 --- a/rva.asd +++ b/rva.asd @@ -15,8 +15,7 @@ :serial t :components ((:file "package") (:file "util") - (:file "lex") - (:file "parse") + (:file "parse") (:file "main")))) :long-description #.(uiop:read-file-string @@ -37,7 +36,6 @@ :components ((:file "package") (:file "main") (:file "util") - (:file "lex") (:file "parse")))) :perform (test-op (o s) (uiop:symbol-call :rva-tests :test-rva))) diff --git a/src/lex.lisp b/src/lex.lisp deleted file mode 100644 index c86d17c..0000000 --- a/src/lex.lisp +++ /dev/null @@ -1,110 +0,0 @@ -(in-package #:lex) - -(define-condition lexer-error (error) - ((message :initarg :message - :initform nil - :reader message)) - (:report (lambda (condition stream) - (format stream "~A" (message condition)))) - (:documentation "Dedicated error for an invalid lex.")) - -(defun file->tokens (file) - "Opens FILE and parses returns a list of tokens, or -NIL if the file could not be opened." - - (defun read-instr (lst tokens-so-far) - "Collects tokens in FILE into TOKENS-SO-FAR, splitting on a newline." - (let ((token (read-token))) - (cond ((null token) (reverse tokens-so-far)) - ((eq token 'nl) - (cons (reverse tokens-so-far) (read-instr nil nil))) - (t (read-instr lst (cons token tokens-so-far)))))) - - (and (probe-file file) - (with-open-file (*standard-input* file :direction :input) - (remove nil (read-instr '() '()))))) - -(defun read-token () - "Reads *STANDARD-INPUT* and returns a token, or nil if the end -of file has been reached. -Whitespace, commas, colons, and parentheses are token delimiters. -Comments start with a semi-colon ';' and all tokens after are ignored." - (let ((chr (read-char *standard-input* nil))) - (cond - ((null chr) chr) - - ((char= chr #\linefeed) 'nl) - - ((whitespace-char-p chr) - (read-token)) - - ((char= chr #\;) - (progn (read-line *standard-input* nil) - 'nl)) - - ((char= chr #\.) 'period) - - ((char= chr #\() 'left-paren) - ((char= chr #\)) 'right-paren) - - ((char= chr #\:) 'colon) - ((char= chr #\$) 'dollar) - - ((char= chr #\+) 'plus) - ((char= chr #\-) 'minus) - - ((digit-char-p chr) - (read-immediate chr)) - - ((alpha-char-p chr) - (read-keyword chr)) - - (t (error 'lexer-error - :message - (format nil "LEX failled--~a is not a valid lexical symbol.~%" chr)))))) - -(defun read-immediate (chr) - "Reads a sequence of digits, in base 2, 8, 10, or 16.. Throws -`invalid-immediate-or-keyword' error if an alphabetic character is encountered." - ;; may be combined with read-keyword-helper - (defun read-immediate-helper (chrs-so-far) - (let ((chr (peek-char nil *standard-input* nil))) - (cond ((and (not (null chr)) (digit-char-p chr)) - (read-immediate-helper (cons (read-char *standard-input* nil) chrs-so-far))) - ((and (not (null chr)) (alpha-char-p chr)) - (error 'lexer-error - :message - (format nil "LEX failed--encountered ~a while reading immediate.~%" chr))) - (t (reverse chrs-so-far))))) - - (let* ((next (peek-char nil *standard-input* nil)) - (radix (cond ((null next) 10) - ((char= next #\b) 2) - ((char= next #\o) 8) - ((char= next #\x) 16) - ((alpha-char-p next) nil) - (t 10))) - (arg (list chr))) - (when (and (char= chr #\0) radix (not (= radix 10))) - (read-char *standard-input* nil) - (setq arg '())) - (parse-integer (coerce (read-immediate-helper arg) 'string) :radix radix))) - -(defun read-keyword (chr) - "Reads a sequence of alphabetic characters. Throws `invalid-immediate-or-keyword' -error if a digit is encountered." - ;; may be combined with read-immediate-helper - (defun read-keyword-helper (chrs-so-far) - (let ((chr (peek-char nil *standard-input* nil))) - (cond ((and (not (null chr)) (alpha-char-p chr)) - (read-keyword-helper (cons (read-char *standard-input* nil) chrs-so-far))) - ((and (not (null chr)) (digit-char-p chr)) - (error 'lexer-error - :message - (format nil "LEX failed--encountered ~a while reading keyword.~%" chr))) - (t (reverse chrs-so-far))))) - (coerce (read-keyword-helper (list chr)) 'string)) - -(defun whitespace-char-p (x) - (or (char= #\space x) - (not (graphic-char-p x)))) diff --git a/src/main.lisp b/src/main.lisp index 8a36a66..9180d57 100644 --- a/src/main.lisp +++ b/src/main.lisp @@ -17,13 +17,6 @@ _/_/ _/_/ " (defun cli/options () "Returns options for the `rva' assembler." (list - (clingon:make-option - :flag - :description "run the lexer, but stop before parsing" - :long-name "lex" - :short-name #\l - :required nil - :key :lex) (clingon:make-option :flag :description "run the parser, but stop before emission" @@ -37,24 +30,18 @@ _/_/ _/_/ " (print-splash) (let* ((args (clingon:command-arguments cmd)) (file (car args)) - (parse? (not (clingon:getopt cmd :lex))) - (emit? (not (clingon:getopt cmd :parse)))) + (emit? (not (clingon:getopt cmd :parse)))) (cond ;; complain about num arguments ((/= (length args) 1) (error "Wrong number of arguments.~%")) ((not (util:asm-extension? file)) (error "The file is not an asm source code file.~%")) - (t (let ((tokens (lex:file->tokens file))) - (if tokens - (progn (pprint tokens) + (t (let ((str (uiop:read-file-string file))) + (if str + (progn (pprint (esrap:parse 'parse:text (string-upcase str))) (terpri) (format t "---~%")) (error "The file does not exist, or it could not be opened.~%")) - (if parse? - (let ((tokens (parse:tokens->ast tokens))) - (progn (pprint tokens) - (terpri) - (format t "---~%")))) (format t "Nitimur in Vetitum~%")))))) diff --git a/src/package.lisp b/src/package.lisp index b856e38..2b01d15 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -10,18 +10,10 @@ #:iota #:riffle)) -(defpackage #:lex - (:use #:cl) - (:export #:lexer-error - #:file->tokens - ;; exported for testing only - #:read-token)) - (defpackage #:parse (:use #:cl) - (:export #:parser-error - #:tokens->ast - ;; exported for testing only + (:export ;; exported for testing only + #:text #:register #:instr )) diff --git a/src/parse.lisp b/src/parse.lisp index 54b0709..a92eae7 100644 --- a/src/parse.lisp +++ b/src/parse.lisp @@ -1,22 +1,24 @@ (in-package #:parse) (esrap:defrule space - (+ (or #\Space #\Tab)) + (+ (or #\space #\tab)) (:constant nil)) +(esrap:defrule newline + (+ #\newline)) + ;;; defines rules to parse an integer in various bases -(esrap:defrule binary (and #\0 #\b (+ (or "0" "1"))) +(esrap:defrule binary (and #\0 #\B (+ (or "0" "1"))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 2))) -(esrap:defrule octal (and #\0 #\o (+ (or (esrap:character-ranges (#\0 #\7))))) +(esrap:defrule octal (and #\0 #\O (+ (or (esrap:character-ranges (#\0 #\7))))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 8))) (esrap:defrule decimal (+ (or (esrap:character-ranges (#\0 #\9)))) (:lambda (list) (parse-integer (esrap:text list) :radix 10))) -(esrap:defrule hex (and #\0 #\x (+ (or (esrap:character-ranges (#\0 #\9)) - "a" "b" "c" "d" "e" "f" +(esrap:defrule hex (and #\0 #\X (+ (or (esrap:character-ranges (#\0 #\9)) "A" "B" "C" "D" "E" "F"))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 16))) @@ -30,7 +32,10 @@ (esrap:defrule dereference (and (esrap:? (or #\+ #\-)) int #\( register #\)) (:destructure (s i1 w1 r w2) (declare (ignore w1 w2)) - (list r (if (and s (string= s "-")) (- i1) i1)))) + (list r (list 'imm (if (and s (string= s "-")) (- i1) i1))))) + +(esrap:defrule immediate int + (:lambda (i) (list 'imm i))) ;;; defines rules to parse labels @@ -56,6 +61,11 @@ (esrap:defrule j-type-3-m (or "PUSH" "POP")) (defmacro defrule-instr (name type-id order &rest destructure-pattern) + "Defines the boilerplate for a common esrap instruction rule. +NAME is the name of the non-terminal symbol. +TYPE-ID is the symbol which appears as the first element of a successful parse. +ORDER is the order to place the parsed tokens in the resulting list. +DESTRUCTURE-PATTERN is the list of non-terminals on the right side of the grammar rule." (let* ((pattern-size (length destructure-pattern)) (spaces (mapcar (lambda (x) (read-from-string (format nil "w~A" x))) (util:iota pattern-size))) (vars (mapcar (lambda (x) (read-from-string (format nil "s~A" x))) (util:iota pattern-size)))) @@ -68,40 +78,41 @@ (defrule-instr r-type-1 'r (1 2 0) register register register) (defrule-instr r-type-2 'r (1 2 0) register register) (defrule-instr r-type-3 'r (0 1 2) register register) -;;(defrule-instr j-type-1 'j (0 1) dereference) + +(esrap:defrule i-type-1 (and i-type-1-m space register space dereference) + (:destructure (m w1 s w2 di) + (declare (ignore w1 w2)) + `(i ,m ,s ,@di))) + +(esrap:defrule i-type-2 (and i-type-2-m space register space dereference) + (:destructure (m w1 s w2 di) + (declare (ignore w1 w2)) + `(i ,m ,@(util:insert-in-middle di s)))) + +(defrule-instr i-type-3 'i (0 1 2) register register immediate) +(esrap:defrule j-type-1 (and j-type-1-m space dereference) + (:destructure (m w di) + (declare (ignore w)) + `(j ,m ,@di))) + (defrule-instr j-type-2 'j (1 0) label) -(defrule-instr j-type-3 'j (0 1) register) - -;; (esrap:defrule i-type-1 (and i-type-1-m space register space dereference) -;; (:destructure (m w1 s w2 di) -;; (declare (ignore w1 w2)) -;; `(i ,m ,s ,@di))) -;; (esrap:defrule i-type-2 (and i-type-2-m space register space dereference) -;; (:destructure (m w1 s w2 di) -;; (declare (ignore w1 w2)) -;; `(i ,m ,@(util:insert-in-middle di s)))) -;; (esrap:defrule i-type-3 (and i-type-3-m space register space register space int) -;; (:destructure (m w1 s1 w2 s2 w3 i) -;; (declare (ignore w1 w2 w3)) -;; (list i m s1 s2 i))) -;; (esrap:defrule j-type-1 (and j-type-1-m space dereference) -;; (:destructure (m w di) -;; (declare (ignore w)) -;; `(j ,m ,@di))) -;; (esrap:defrule j-type-2 (and j-type-2-m space label) -;; (:destructure (m w label) -;; (declare (ignore w)) -;; (list j m "00000" label))) -;; (esrap:defrule j-type-3 (and j-type-3-m space register) -;; (:destructure (m w r) -;; (declare (ignore w)) -;; (list j m r "00000"))) - -(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 j-type-2 j-type-3)) - - -;; (esrap:parse 'register "$3") - -;; (esrap:parse 'j-type-1 "JMP 3($3)") -;; (esrap:parse 'j-type-2 "JRL FOO") -;; (esrap:parse 'j-type-3 "PUSH $1") +(esrap:defrule j-type-3 (and j-type-3-m space register) + (:destructure (m w r) + (declare (ignore w)) + `(j ,m ,r (imm 0)))) + +(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 i-type-1 i-type-2 + i-type-3 j-type-1 j-type-2 j-type-3 label-decl)) + +;;; defines rules to parse the .text segment + +(esrap:defrule instr-clean (and (esrap:? space) instr newline) + (:destructure (w1 i w2) + (declare (ignore nl l)) + i)) + +(esrap:defrule text (and ".TEXT" newline + (* instr-clean)) + (:destructure (txt nl is) + (declare (ignore txt nl)) + (list 'text is))) diff --git a/t/lex.lisp b/t/lex.lisp deleted file mode 100644 index 98d7c28..0000000 --- a/t/lex.lisp +++ /dev/null @@ -1,104 +0,0 @@ -(in-package #:rva-tests) - -(defmacro read-this (str &body body) - `(let ((*standard-input* (make-string-input-stream ,str))) - ,@body)) - -(def-suite lex-tests - :description "Test functions exported from the lexer." - :in all-tests) - -(in-suite lex-tests) - -(test read-token-reads-eof - (read-this "" - (is (not (lex:read-token))))) - -(test read-token-reads-nl - (read-this " -" - (is (eq (lex:read-token) 'lex::nl)))) - -(test read-token-reads-left-paren - (read-this "." - (is (eq (lex:read-token) 'lex::period)))) - -(test read-token-reads-left-paren - (read-this "(" - (is (eq (lex:read-token) 'lex::left-paren)))) - -(test read-token-reads-right-paren - (read-this ")" - (is (eq (lex:read-token) 'lex::right-paren)))) - -(test read-token-reads-left-paren - (read-this "$" - (is (eq (lex:read-token) 'lex::dollar)))) - -(test read-token-reads-plus - (read-this "+" - (is (eq (lex:read-token) 'lex::plus)))) - -(test read-token-reads-minus - (read-this "-" - (is (eq (lex:read-token) 'lex::minus)))) - -(test read-token-ignores-space - (read-this " (" - (is (eq (lex:read-token) 'lex::left-paren)))) - -(test read-token-ignores-tab - (read-this " (" - (is (eq (lex:read-token) 'lex::left-paren)))) - -(test read-token-ignores-comment - (read-this "; this is a comment -(" - (is (eq (lex:read-token) 'lex::nl)))) - -(test read-token-immediate-zero - (read-this "0" - (is (= (lex:read-token) 0)))) - -(test read-token-immediate-all-digits - (read-this "123456789" - (is (= (lex:read-token) 123456789)))) - -(test read-token-immediate-binary - (read-this "0b00101010" - (is (= (lex:read-token) 42)))) - -(test read-token-immediate-octal - (read-this "0o052" - (is (= (lex:read-token) 42)))) - -(test read-token-immediate-hexadecimal - (read-this "0x200" - (is (= (lex:read-token) 512)))) - -(test read-token-immediate-invalid-immediate - (handler-case - (progn (read-this "0v0" (lex:read-token)) - (fail)) - (lex:lexer-error ()))) - -;; do we want a custom error for this too? -(test read-token-immediate-radix - (handler-case - (progn (read-this "0x" (lex:read-token)) - (fail)) - (sb-int:simple-parse-error ()))) - -(test read-token-keyword-single - (read-this "a" - (is (string= (lex:read-token) "a")))) - -(test read-token-keyword-add - (read-this "addi" - (is (string= (lex:read-token) "addi")))) - -(test read-token-immediate-invalid-keyword - (handler-case - (progn (read-this "sub0" (lex:read-token)) - (fail)) - (lex:lexer-error ()))) diff --git a/t/parse.lisp b/t/parse.lisp index 77db380..a9de6db 100644 --- a/t/parse.lisp +++ b/t/parse.lisp @@ -37,22 +37,26 @@ (is (equal '(parse::r "CMP" (parse::rr 1) (parse::rr 5) (parse::rr 0)) (esrap:parse 'parse:instr "CMP $1 $5")))) -;; (test esrap-i-type-1 -;; (is (equal (list 'parse::i "LOAD" (list 'parse::rr 8) (list 'parse::rr 9) (list 'parse::r 1)) -;; (esrap:parse 'parse:instr "LOAD $8 1($9)")))) +(test esrap-i-type-1 + (is (equal '(parse::i "LOAD" (parse::rr 8) (parse::rr 9) (parse::imm 1)) + (esrap:parse 'parse:instr "LOAD $8 1($9)")))) -;; (test esrap-i-type-2 -;; (is (equal (list 'parse::i "STORE" (list 'parse::rr 3) (list 'parse::rr 5) (list 'parse::rr 3)) -;; (esrap:parse 'parse:instr "STORE $5 3($3)")))) +(test esrap-i-type-2 + (is (equal '(parse::i "STORE" (parse::rr 3) (parse::rr 5) (parse::imm 3)) + (esrap:parse 'parse:instr "STORE $5 3($3)")))) -;; (test esrap-i-type-3 -;; (is (equal (list 'parse::i "ORI" (list 'parse::rr 5) (list 'parse::rr 4) (list 'parse::r 2)) -;; (esrap:parse 'parse:instr "ORI $5 $4 2")))) +(test esrap-i-type-3 + (is (equal '(parse::i "ORI" (parse::rr 5) (parse::rr 4) (parse::imm 2)) + (esrap:parse 'parse:instr "ORI $5 $4 2")))) + +(test esrap-j-type-1 + (is (equal '(parse::j "JMP" (parse::rr 3) (parse::imm 3)) + (esrap:parse 'parse:instr "JMP 3($3)")))) (test esrap-j-type-2 (is (equal '(parse::j "JRL" (parse::rr 0) (parse::l "FOO")) (esrap:parse 'parse:instr "JRL FOO")))) (test esrap-j-type-3 - (is (equal '(parse::j "PUSH" (parse::rr 1) 0) + (is (equal '(parse::j "PUSH" (parse::rr 1) (parse::imm 0)) (esrap:parse 'parse:instr "PUSH $1")))) -- cgit v1.2.3 From df508744ec2975cec0ba05e8a4358c1c41265c4c Mon Sep 17 00:00:00 2001 From: bd Date: Thu, 10 Apr 2025 03:53:43 -0400 Subject: Add untested (but works on the single input file) code emission --- rva.asd | 1 + src/emit.lisp | 66 ++++++++++++++++++++++++++ src/main.lisp | 7 ++- src/package.lisp | 11 +++-- src/parse.lisp | 42 ++++++++--------- src/util.lisp | 31 +++---------- t/parse.lisp | 139 +++++++++++++++++++++++++++++++------------------------ t/util.lisp | 16 ++++--- 8 files changed, 193 insertions(+), 120 deletions(-) create mode 100644 src/emit.lisp (limited to 'rva.asd') diff --git a/rva.asd b/rva.asd index 9271472..092fd44 100644 --- a/rva.asd +++ b/rva.asd @@ -16,6 +16,7 @@ :components ((:file "package") (:file "util") (:file "parse") + (:file "emit") (:file "main")))) :long-description #.(uiop:read-file-string diff --git a/src/emit.lisp b/src/emit.lisp new file mode 100644 index 0000000..acc2772 --- /dev/null +++ b/src/emit.lisp @@ -0,0 +1,66 @@ +(in-package #:emit) + +(defun fits-in-X-bits (n) + "Returns the number of bits required to represent N" + (ceiling (/ (log (ceiling n (log 2))) (log 2)))) + +(defmacro generate-type-map (ops) + "Generates an alist where the key corresponds to an element in +OPS, while the value is the index of that key (padded to the minimum +number of bits required to represent all +concatenated with TYPE." + `(let ((i 0) + (opsize (fits-in-X-bits (length ,ops)))) + (mapcar (lambda (x) + (incf i) + (cons x (util:format-as-binary i opsize))) + ,ops))) + +(defvar mnemonic-loc + `(,@(generate-type-map util:r-type) + ,@(generate-type-map util:i-type) + ,@(generate-type-map util:j-type)) + "An alist mapping known mnemonics to their binary representation.") + +(defun lookup-mnemonic (mnemonic) + (cdr (assoc mnemonic mnemonic-loc :test #'string=))) + +(defun p (d x) + (append x d)) + +(defun d (&rest lst) + (mapcar (lambda (x) (util:format-as-binary x 32)) lst)) + +(defun x (&rest lst) + lst) + +(defun r (mnemonic s1 s2 d) + (concatenate + 'string (format nil "~10,'0d" 0) d s2 s1 (lookup-mnemonic mnemonic) "00")) + +(defun i (mnemonic s d i) + (concatenate + 'string (util:format-as-binary i 16) d s (lookup-mnemonic mnemonic) "01")) + +(defun j (mnemonic b d) + (concatenate + 'string (util:format-as-binary d 21) b (lookup-mnemonic mnemonic) "10")) + +(defun rr (val) + (if (<= 0 val 23) + (util:format-as-binary val 5) + (error (format nil "~a is not a valid register id!~%" val)))) + +(defun imm (val) val) + +(defun l (l s) + (let ((d (util:get-label l))) + (- d s))) + +(defun var (s) + (let ((pos (util:get-variable s))) + (+ pos parse:line-number))) + +(defun emit (p) + (format t "~a~%" p) + (eval p)) diff --git a/src/main.lisp b/src/main.lisp index 9692603..f20b022 100644 --- a/src/main.lisp +++ b/src/main.lisp @@ -38,10 +38,9 @@ _/_/ _/_/ " (error "The file is not an asm source code file.~%")) (t (let ((str (uiop:read-file-string file))) (if str - (progn (pprint (esrap:parse 'parse:str->ast (string-upcase str))) - (terpri) - (maphash #'(lambda (k v) (format t "~A => ~A~%" k v)) util:label-table) - (format t "---~%")) + (let ((ast (esrap:parse 'parse::str->ast (string-upcase str)))) + (when emit? + (format t "~a~%" (emit::emit ast)))) (error "The file does not exist, or it could not be opened.~%")) (format t "Nitimur in Vetitum~%")))))) diff --git a/src/package.lisp b/src/package.lisp index 115942e..bbb45a3 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -13,12 +13,15 @@ #:add-label #:get-variable #:get-label - #:label-table #:r-type #:i-type - #:j-type - #:mnemonic-loc)) + #:j-type)) (defpackage #:parse (:use #:cl) - (:export #:str->ast)) + (:export #:str->ast + #:line-number)) + +(defpackage #:emit + (:use #:cl) + (:export #:emit)) diff --git a/src/parse.lisp b/src/parse.lisp index d07fbde..d971444 100644 --- a/src/parse.lisp +++ b/src/parse.lisp @@ -44,25 +44,25 @@ (esrap:defrule register (and #\$ int) (:function cadr) - (:lambda (e) (list 'rr e))) + (:lambda (e) (list 'emit::rr e))) (esrap:defrule var alpha - (:lambda (e) (list (list 'rr 0) (list 'var e)))) + (:lambda (e) (list (list 'emit::rr 0) (list 'emit::var e)))) (esrap:defrule dereference (and (esrap:? (or #\+ #\-)) int #\( register #\)) (:destructure (s i1 w1 r w2) (declare (ignore w1 w2)) - (list r (list 'imm (if (and s (string= s "-")) (- i1) i1))))) + (list r (list 'emit::imm (if (and s (string= s "-")) (- i1) i1))))) (esrap:defrule immediate int - (:lambda (e) (list 'imm e))) + (:lambda (e) (list 'emit::imm e))) ;;; defines rules to parse labels (esrap:defrule label alpha - (:lambda (e) (list 'l e))) + (:lambda (e) (list 'emit::l e line-number))) -(esrap:defrule label-decl (and label #\:) +(esrap:defrule label-decl (and alpha #\:) (:function car) (:lambda (e) (util:add-label e line-number) @@ -102,33 +102,33 @@ DESTRUCTURE-PATTERN is the list of non-terminals on the right side of the gramma (and ,(read-from-string (format nil "~A-m" name)) ,@(util:riffle (make-list pattern-size :initial-element 'space) destructure-pattern)) (:destructure (m ,@(util:riffle spaces vars)) (declare (ignore ,@spaces)) - (list ,type-id m ,@(mapcar (lambda (x) (or (nth x vars) ''(rr 0))) order)))))) + (list ,type-id m ,@(mapcar (lambda (x) (or (nth x vars) ''(emit::rr 0))) order)))))) -(defrule-instr r-type-1 'r (1 2 0) register register) -(defrule-instr r-type-2 'r (0 1 2) register register) -(defrule-instr r-type-3 'r (1 2 0) register register register) -(defrule-instr i-type-3 'i (0 1 2) register register immediate) -(defrule-instr j-type-3 'j (1 0) label) +(defrule-instr r-type-1 'emit::r (1 2 0) register register) +(defrule-instr r-type-2 'emit::r (0 1 2) register register) +(defrule-instr r-type-3 'emit::r (1 2 0) register register register) +(defrule-instr i-type-3 'emit::i (0 1 2) register register immediate) +(defrule-instr j-type-3 'emit::j (1 0) label) (esrap:defrule i-type-1 (and i-type-1-m space register space (or dereference var)) (:destructure (m w1 s w2 di) (declare (ignore w1 w2)) - `(i ,m ,s ,@di))) + `(emit::i ,m ,s ,@di))) (esrap:defrule i-type-2 (and i-type-2-m space register space (or dereference var)) (:destructure (m w1 s w2 di) (declare (ignore w1 w2)) - `(i ,m ,@(util:insert-in-middle di s)))) + `(emit::i ,m ,@(util:insert-in-middle di s)))) (esrap:defrule j-type-1 (and j-type-1-m space dereference) (:destructure (m w di) (declare (ignore w)) - `(j ,m ,@di))) + `(emit::j ,m ,@di))) (esrap:defrule j-type-2 (and j-type-2-m space register) (:destructure (m w r) (declare (ignore w)) - `(j ,m ,r (imm 0)))) + `(emit::j ,m ,r (emit::imm 0)))) (esrap:defrule instr (or r-type-1 r-type-2 r-type-3 i-type-1 i-type-2 i-type-3 j-type-1 j-type-2 j-type-3)) @@ -145,7 +145,7 @@ DESTRUCTURE-PATTERN is the list of non-terminals on the right side of the gramma (esrap:defrule text (and ".TEXT" (esrap:? space) nl (* text-line)) (:function cadddr) - (:lambda (e) `(x ,@(remove nil e)))) + (:lambda (e) `(emit::x ,@(remove nil e)))) ;;; defines rules to parse the .data segment @@ -165,10 +165,10 @@ DESTRUCTURE-PATTERN is the list of non-terminals on the right side of the gramma (esrap:defrule data (and ".DATA" (esrap:? space) nl (* data-line)) (:function cadddr) - (:lambda (e) `(d ,@(apply #'append e)))) + (:lambda (e) `(emit::d ,@(apply #'append e)))) ;;; defines rules to parse a program -(esrap:defrule str->ast (and (* (or space nl)) (* (or data text))) - (:function cadr) - (:lambda (e) `(p ,@e))) +(esrap:defrule str->ast (and (* (or space nl)) data text) + (:function cdr) + (:lambda (e) `(emit::p ,@e))) diff --git a/src/util.lisp b/src/util.lisp index d8bd227..f3035fe 100644 --- a/src/util.lisp +++ b/src/util.lisp @@ -4,15 +4,13 @@ "Returns t if FILE is extended with .asm, nil otherwise." (string= (pathname-type file) "asm")) -(defun fits-in-X-bits (n) - "Returns the number of bits required to represent N" - (ceiling (/ (log (ceiling n (log 2))) (log 2)))) (defun format-as-binary (num len) "Formats NUM as a binary number, and pads to LEN with zeros." (declare (type number num)) (declare (type (integer 0 *) len)) - (format nil "~V,'0b" len num)) + (let ((max-val (1- (expt 2 len)))) + (format nil "~V,'0b" len (logand num max-val)))) (defun insert-in-middle (list element) (append (list (car list)) (list element) (cdr list))) @@ -33,8 +31,8 @@ of the elements from both lists. Returns nil if the lists are not equal size." for l2 in lst2 append (list l1 l2)))) -(defvar variable-table (make-hash-table :test #'equal)) -(defvar label-table (make-hash-table :test #'equal)) +(defvar variable-table (make-hash-table :test #'equalp)) +(defvar label-table (make-hash-table :test #'equalp)) (defun add-variable (name value) (if (gethash name variable-table) @@ -49,25 +47,14 @@ of the elements from both lists. Returns nil if the lists are not equal size." (defun get-variable (name) (alexandria:if-let ((value (gethash name variable-table))) value - (error "~@" name))) + (progn (maphash #'(lambda (k v) (format t "~A => ~A~%" k v)) variable-table) + (error "~@" name)))) (defun get-label (name) (alexandria:if-let ((value (gethash name label-table))) value (error "~@