From 8ceaf87c43d412b758bb7562364f7ae8bee913ae Mon Sep 17 00:00:00 2001 From: bd Date: Tue, 8 Apr 2025 21:41:35 -0400 Subject: Add parsing functionality for R-type expressions --- src/main.lisp | 8 ++++++- src/package.lisp | 6 +++-- src/parse.lisp | 73 +++++++++++++++++++++++++++++++++++++++++++++----------- src/util.lisp | 5 +--- t/parse.lisp | 58 ++++++++++++++++++++++++++++++++++++++++---- 5 files changed, 125 insertions(+), 25 deletions(-) diff --git a/src/main.lisp b/src/main.lisp index f6e5754..8a36a66 100644 --- a/src/main.lisp +++ b/src/main.lisp @@ -47,8 +47,14 @@ _/_/ _/_/ " (t (let ((tokens (lex:file->tokens file))) (if tokens (progn (pprint tokens) - (terpri)) + (terpri) + (format t "---~%")) (error "The file does not exist, or it could not be opened.~%")) + (if parse? + (let ((tokens (parse:tokens->ast tokens))) + (progn (pprint tokens) + (terpri) + (format t "---~%")))) (format t "Nitimur in Vetitum~%")))))) diff --git a/src/package.lisp b/src/package.lisp index 3364856..7a14bd2 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -1,4 +1,4 @@ -helper(defpackage #:rva +(defpackage #:rva (:use #:cl) (:export #:main)) @@ -22,5 +22,7 @@ helper(defpackage #:rva (:use #:cl) (:export #:parser-error #:tokens->ast + #:to-register ;; exported for testing only - #:extract-label)) + #:extract-label + #:extract-r-type)) diff --git a/src/parse.lisp b/src/parse.lisp index 3052583..bd50a23 100644 --- a/src/parse.lisp +++ b/src/parse.lisp @@ -1,4 +1,14 @@ -helper(in-package #:parse) +(in-package #:parse) + +(defparameter reg-loc + '(("zr" . 0) ("lk" . 1) + ("sp" . 2) ("cc" . 3) + ("vl" . 4) ("fp" . 5)) + "A symbol table mapping register aliases to identifiers. If you want to add +a new alias, do it here.") + +(defparameter label-loc '() + "A symbol table mapping label names to line indices.") (define-condition parser-error (error) ((message :initarg :message @@ -8,6 +18,19 @@ helper(in-package #:parse) (format stream "~A" (message condition)))) (:documentation "Dedicated error for an invalid parse.")) +(defun to-register (tokens) + "Attempts to parse a register from the start of TOKENS. If it is badly formed, +throws a PARSER-ERROR." + (or (and (equal (car tokens) 'LEX::DOLLAR) + (cadr tokens) + (let ((r (cadr tokens))) + (cond ((stringp r) (cdr (assoc r reg-loc :test #'string=))) + ((numberp r) (and (<= 0 r 23) r))))) + (error 'parser-error + :message + (format nil "PARSE failed--Expected register, got ~a.~%" + (subseq tokens 0 (min 2 (length tokens))))))) + (defun tokens->ast (program) "Given PROGRAM, which is a list of lists of symbols, filters out the labels and parses." @@ -26,30 +49,52 @@ Note that this function is intended to be called using mapcar, so that labels can be added to a map and otherwise removed from processing." (trivia:match line - ((list (and id (type string)) - (satisfies (lambda (x) (equal x 'lex::colon)))) - (progn (push (cons (read-from-string id) i) util:label-loc) nil)) + ((list (type string) 'lex::colon) + (progn (push (cons (read-from-string (car line)) i) label-loc) nil)) (_ (progn (incf i) line))))) (defun extract-instruction (line i) "Given instruction LINE, determines the expected type format and passes LINE and the index I to the the respective function." - ;; TODO add pseudo-ops (i.e., nop, mov, ...) - (let* ((type-map '((r-type . extract-r-type) - (i-type . extract-i-type) - (j-type . extract-j-type))) - (keyword (car line)) - (type-fn (cdr (assoc keyword type-map)))) + (let* ((mnemonic (intern (string-upcase (car line)) :util)) + ;; TODO add pseudo-ops (i.e., nop, leave, ret...) + ;; should probably be their own extract function + (type-fn (cond + ((member mnemonic util:type-r) #'extract-r-type) + ((member mnemonic util:type-i) #'extract-i-type) + ((member mnemonic util:type-j) #'extract-j-type)))) (if type-fn (funcall type-fn line i) (error 'parser-error - (format nil "PARSE failed--~a is not a known keyword.~%" (keyword)))))) + :message + (format nil "PARSE failed--~a is not a known keyword.~%" mnemonic))))) (defun extract-r-type (line i) - 'r) + (let ((mnemonic (intern (string-upcase (car line)) :util))) + (defun die () + (error 'parser-error + :message + (format nil "PARSE failed---Incorrect number of operands for ~a" mnemonic))) + (defun eat-registers (registers-so-far lst) + (if (not (null lst)) + (eat-registers (cons (to-register lst) registers-so-far) + (cddr lst)) + (reverse registers-so-far))) + (let* ((registers (eat-registers '() (cdr line))) + ;; handle special cases + (registers (cond ((member mnemonic '(util::CMP util::CEV)) + (if (= 2 (length registers)) + (cons 0 registers) + (die))) + ((member mnemonic '(util::NOT)) + (if (= 2 (length registers)) + (append registers (list 0)) + (die))) + (t (if (= 3 (length registers)) registers (die)))))) + (list :op mnemonic :d (car registers) :s1 (cadr registers) :s2 (caddr registers))))) (defun extract-i-type (line i) - 'i) + line) (defun extract-j-type (line i) - 'j) + line) diff --git a/src/util.lisp b/src/util.lisp index 5edee4a..027a770 100644 --- a/src/util.lisp +++ b/src/util.lisp @@ -16,12 +16,9 @@ "R-type instructions.") (defparameter type-i - '(LOAD LOADV ADDI SUBI SFTRI SFTLI ANDI ORI XORI STORE STOREV) + '(LOAD LOADV ADDI SUBI SFTRI SFTLI ANDI ORI XORI STORE STOREV MOV) "I-type instructions.") (defparameter type-j '(JMP JRL JAL BEQ BGT BUF BOF PUSH POP) "J-type instructions.") - -(defparameter label-loc '() - "A symbol table mapping label names to line indices.") diff --git a/t/parse.lisp b/t/parse.lisp index bd1310f..7187292 100644 --- a/t/parse.lisp +++ b/t/parse.lisp @@ -1,5 +1,11 @@ (in-package #:rva-tests) +(defmacro expect-parse-error (body) + `(handler-case + (progn ,body + (fail)) + (parse::parser-error ()))) + (def-suite parse-tests :description "Test functions exported from the parser." :in all-tests) @@ -20,7 +26,51 @@ (parse:extract-label lst))))) (test extract-line-invalid-type - (handler-case - (progn (parse:tokens->ast '(("foo" LEX::DOLLAR))) - (fail)) - (lex:parser-error ()))) + (expect-parse-error (parse:tokens->ast '(("foo" lex::dollar))))) + +(test to-register-nil + (expect-parse-error (parse:to-register '()))) + +(test to-register-singleton + (expect-parse-error (parse:to-register '(lex::dollar)))) + +(test to-register-zero + (is (= 0 (parse:to-register '(lex::dollar 0))))) + +(test to-register-one + (is (= 1 (parse:to-register '(lex::dollar 1))))) + +(test to-register-twenty-three + (is (= 23 (parse:to-register '(lex::dollar 23))))) + +(test to-register-zero-named + (is (= 0 (parse:to-register '(lex::dollar "zr"))))) + +(test to-register-twenty-four + (expect-parse-error (parse:to-register '(lex::dollar 24)))) + +(test to-register-negative-one + (expect-parse-error (parse:to-register '(lex::dollar -1)))) + +(test extract-r-type-no-registers + (expect-parse-error (parse:extract-r-type '("add") 0))) + +(test extract-r-type-two-registers + (expect-parse-error (parse:extract-r-type '("add" lex::dollar 2 lex::dollar 3) 0))) + +(test extract-r-type-cmp-three-registers + (expect-parse-error (parse:extract-r-type '("cmp" lex::dollar 2 + lex::dollar 3 lex::dollar 4) 0))) + +(test extract-r-type-simple-add + (is (equal '(:op util::ADD :d 2 :s1 3 :s2 4) + (parse:extract-r-type '("add" lex::dollar 2 + lex::dollar 3 lex::dollar 4) 0)))) + +(test extract-r-type-simple-not + (is (equal '(:op util::NOT :d 2 :s1 3 :s2 0) + (parse:extract-r-type '("not" lex::dollar 2 lex::dollar 3) 0)))) + +(test extract-r-type-simple-cmp + (is (equal '(:op util::CMP :d 0 :s1 2 :s2 3) + (parse:extract-r-type '("cmp" lex::dollar 2 lex::dollar 3) 0)))) -- cgit v1.2.3 From 4df084d3e2785412eb086fb02ac10be5def695d1 Mon Sep 17 00:00:00 2001 From: bd Date: Wed, 9 Apr 2025 09:28:22 -0400 Subject: Remove trivia --- README.md | 1 - rva.asd | 3 +-- src/parse.lisp | 9 +++++---- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index aa61af6..4f2631d 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,6 @@ A common-lisp implementation (SBCL) and the following libraries are required to - ASDF (tested with v3.3.7) - fiveam (tested with v3.3.7) - clingon (tested with v0.5.0-1.f2a730f) -- trivia (tested with v0.1-0.8b406c3) ## To run diff --git a/rva.asd b/rva.asd index 7fdb00b..83dc9fe 100644 --- a/rva.asd +++ b/rva.asd @@ -9,8 +9,7 @@ :description "Assembler for the RISC-V[ECTOR] mini-ISA." :source-control (:git "git@github.com:bdunahu/rva.git") :depends-on (:uiop - :clingon - :trivia) + :clingon) :components ((:module "src" :serial t :components ((:file "package") diff --git a/src/parse.lisp b/src/parse.lisp index bd50a23..37415c8 100644 --- a/src/parse.lisp +++ b/src/parse.lisp @@ -48,10 +48,11 @@ label, and pushed onto the stack with the line index. Note that this function is intended to be called using mapcar, so that labels can be added to a map and otherwise removed from processing." - (trivia:match line - ((list (type string) 'lex::colon) - (progn (push (cons (read-from-string (car line)) i) label-loc) nil)) - (_ (progn (incf i) line))))) + (if (and (equal 2 (length line)) + (stringp (car line)) + (equal 'lex::colon (cadr line))) + (progn (push (cons (read-from-string (car line)) i) label-loc) nil) + (progn (incf i) line)))) (defun extract-instruction (line i) "Given instruction LINE, determines the expected type format and passes -- cgit v1.2.3 From 6ba1871c3825e17d33b96ffd3051239dfe18d61a Mon Sep 17 00:00:00 2001 From: bd Date: Wed, 9 Apr 2025 19:26:51 -0400 Subject: Saving first part of large rewrite --- rva.asd | 3 +- src/lex.lisp | 2 + src/package.lisp | 15 ++-- src/parse.lisp | 204 ++++++++++++++++++++++++++++--------------------------- src/util.lisp | 19 ++++++ t/lex.lisp | 4 ++ t/parse.lisp | 96 +++++++++++--------------- 7 files changed, 178 insertions(+), 165 deletions(-) diff --git a/rva.asd b/rva.asd index 83dc9fe..dd904fb 100644 --- a/rva.asd +++ b/rva.asd @@ -9,7 +9,8 @@ :description "Assembler for the RISC-V[ECTOR] mini-ISA." :source-control (:git "git@github.com:bdunahu/rva.git") :depends-on (:uiop - :clingon) + :clingon + :esrap) :components ((:module "src" :serial t :components ((:file "package") diff --git a/src/lex.lisp b/src/lex.lisp index 5b1457d..c86d17c 100644 --- a/src/lex.lisp +++ b/src/lex.lisp @@ -42,6 +42,8 @@ Comments start with a semi-colon ';' and all tokens after are ignored." (progn (read-line *standard-input* nil) 'nl)) + ((char= chr #\.) 'period) + ((char= chr #\() 'left-paren) ((char= chr #\)) 'right-paren) diff --git a/src/package.lisp b/src/package.lisp index 7a14bd2..b856e38 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -6,10 +6,9 @@ (:use #:cl) (:export #:asm-extension? #:format-as-binary - #:type-r - #:type-i - #:type-j - #:label-loc)) + #:insert-in-middle + #:iota + #:riffle)) (defpackage #:lex (:use #:cl) @@ -22,7 +21,7 @@ (:use #:cl) (:export #:parser-error #:tokens->ast - #:to-register - ;; exported for testing only - #:extract-label - #:extract-r-type)) + ;; exported for testing only + #:register + #:instr + )) diff --git a/src/parse.lisp b/src/parse.lisp index 37415c8..54b0709 100644 --- a/src/parse.lisp +++ b/src/parse.lisp @@ -1,101 +1,107 @@ (in-package #:parse) -(defparameter reg-loc - '(("zr" . 0) ("lk" . 1) - ("sp" . 2) ("cc" . 3) - ("vl" . 4) ("fp" . 5)) - "A symbol table mapping register aliases to identifiers. If you want to add -a new alias, do it here.") - -(defparameter label-loc '() - "A symbol table mapping label names to line indices.") - -(define-condition parser-error (error) - ((message :initarg :message - :initform nil - :reader message)) - (:report (lambda (condition stream) - (format stream "~A" (message condition)))) - (:documentation "Dedicated error for an invalid parse.")) - -(defun to-register (tokens) - "Attempts to parse a register from the start of TOKENS. If it is badly formed, -throws a PARSER-ERROR." - (or (and (equal (car tokens) 'LEX::DOLLAR) - (cadr tokens) - (let ((r (cadr tokens))) - (cond ((stringp r) (cdr (assoc r reg-loc :test #'string=))) - ((numberp r) (and (<= 0 r 23) r))))) - (error 'parser-error - :message - (format nil "PARSE failed--Expected register, got ~a.~%" - (subseq tokens 0 (min 2 (length tokens))))))) - -(defun tokens->ast (program) - "Given PROGRAM, which is a list of lists of symbols, -filters out the labels and parses." - ;; TODO add directives - (let ((program (remove nil (mapcar #'extract-label program))) - (i 0)) - (mapcar (lambda (l) (extract-instruction l i)) program))) - -(let ((i 0)) - (defun extract-label (line) - "Given a series of tokens LINE, determines if LINE is -in the form STRING {colon}. If it is, then it is treated as a -label, and pushed onto the stack with the line index. - -Note that this function is intended to be called using mapcar, -so that labels can be added to a map and otherwise removed from -processing." - (if (and (equal 2 (length line)) - (stringp (car line)) - (equal 'lex::colon (cadr line))) - (progn (push (cons (read-from-string (car line)) i) label-loc) nil) - (progn (incf i) line)))) - -(defun extract-instruction (line i) - "Given instruction LINE, determines the expected type format and passes -LINE and the index I to the the respective function." - (let* ((mnemonic (intern (string-upcase (car line)) :util)) - ;; TODO add pseudo-ops (i.e., nop, leave, ret...) - ;; should probably be their own extract function - (type-fn (cond - ((member mnemonic util:type-r) #'extract-r-type) - ((member mnemonic util:type-i) #'extract-i-type) - ((member mnemonic util:type-j) #'extract-j-type)))) - (if type-fn - (funcall type-fn line i) - (error 'parser-error - :message - (format nil "PARSE failed--~a is not a known keyword.~%" mnemonic))))) - -(defun extract-r-type (line i) - (let ((mnemonic (intern (string-upcase (car line)) :util))) - (defun die () - (error 'parser-error - :message - (format nil "PARSE failed---Incorrect number of operands for ~a" mnemonic))) - (defun eat-registers (registers-so-far lst) - (if (not (null lst)) - (eat-registers (cons (to-register lst) registers-so-far) - (cddr lst)) - (reverse registers-so-far))) - (let* ((registers (eat-registers '() (cdr line))) - ;; handle special cases - (registers (cond ((member mnemonic '(util::CMP util::CEV)) - (if (= 2 (length registers)) - (cons 0 registers) - (die))) - ((member mnemonic '(util::NOT)) - (if (= 2 (length registers)) - (append registers (list 0)) - (die))) - (t (if (= 3 (length registers)) registers (die)))))) - (list :op mnemonic :d (car registers) :s1 (cadr registers) :s2 (caddr registers))))) - -(defun extract-i-type (line i) - line) - -(defun extract-j-type (line i) - line) +(esrap:defrule space + (+ (or #\Space #\Tab)) + (:constant nil)) + +;;; defines rules to parse an integer in various bases + +(esrap:defrule binary (and #\0 #\b (+ (or "0" "1"))) + (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 2))) + +(esrap:defrule octal (and #\0 #\o (+ (or (esrap:character-ranges (#\0 #\7))))) + (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 8))) + +(esrap:defrule decimal (+ (or (esrap:character-ranges (#\0 #\9)))) + (:lambda (list) (parse-integer (esrap:text list) :radix 10))) + +(esrap:defrule hex (and #\0 #\x (+ (or (esrap:character-ranges (#\0 #\9)) + "a" "b" "c" "d" "e" "f" + "A" "B" "C" "D" "E" "F"))) + (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 16))) + +(esrap:defrule int (or binary octal hex decimal)) + +;;; defines rules to parse an operand + +(esrap:defrule register (and #\$ (or int reg-id)) + (:lambda (list) (list 'rr (cadr list)))) + +(esrap:defrule dereference (and (esrap:? (or #\+ #\-)) int #\( register #\)) + (:destructure (s i1 w1 r w2) + (declare (ignore w1 w2)) + (list r (if (and s (string= s "-")) (- i1) i1)))) + +;;; defines rules to parse labels + +(esrap:defrule label (+ (alphanumericp character)) + (:lambda (list) (list 'l (esrap:text list)))) + +(esrap:defrule label-decl (and label #\:) + (:destructure (l w) + (declare (ignore w)) + l)) + +;;; defines rules to parse instruction types + +(esrap:defrule r-type-1-m (or "ADDV" "SUBV" "MULV" "DIVV" "ADD" "SUB" "MUL" + "QUOT" "REM" "SFTR" "SFTL" "AND" "OR" "NOT" "XOR" )) +(esrap:defrule r-type-2-m "NOT") +(esrap:defrule r-type-3-m (or "CMP" "CEV")) +(esrap:defrule i-type-1-m (or "LOADV" "LOAD")) +(esrap:defrule i-type-2-m (or "STOREV" "STORE")) +(esrap:defrule i-type-3-m (or "ADDI" "SUBI" "SFTRI" "SFTLI" "ANDI" "ORI" "XORI")) +(esrap:defrule j-type-1-m (or "JMP" "JAL")) +(esrap:defrule j-type-2-m (or "JRL" "BEQ" "BGT" "BUF" "BOF")) +(esrap:defrule j-type-3-m (or "PUSH" "POP")) + +(defmacro defrule-instr (name type-id order &rest destructure-pattern) + (let* ((pattern-size (length destructure-pattern)) + (spaces (mapcar (lambda (x) (read-from-string (format nil "w~A" x))) (util:iota pattern-size))) + (vars (mapcar (lambda (x) (read-from-string (format nil "s~A" x))) (util:iota pattern-size)))) + `(esrap:defrule ,name + (and ,(read-from-string (format nil "~A-m" name)) ,@(util:riffle (make-list pattern-size :initial-element 'space) destructure-pattern)) + (:destructure (m ,@(util:riffle spaces vars)) + (declare (ignore ,@spaces)) + (list ,type-id m ,@(mapcar (lambda (x) (or (nth x vars) ''(rr 0))) order)))))) + +(defrule-instr r-type-1 'r (1 2 0) register register register) +(defrule-instr r-type-2 'r (1 2 0) register register) +(defrule-instr r-type-3 'r (0 1 2) register register) +;;(defrule-instr j-type-1 'j (0 1) dereference) +(defrule-instr j-type-2 'j (1 0) label) +(defrule-instr j-type-3 'j (0 1) register) + +;; (esrap:defrule i-type-1 (and i-type-1-m space register space dereference) +;; (:destructure (m w1 s w2 di) +;; (declare (ignore w1 w2)) +;; `(i ,m ,s ,@di))) +;; (esrap:defrule i-type-2 (and i-type-2-m space register space dereference) +;; (:destructure (m w1 s w2 di) +;; (declare (ignore w1 w2)) +;; `(i ,m ,@(util:insert-in-middle di s)))) +;; (esrap:defrule i-type-3 (and i-type-3-m space register space register space int) +;; (:destructure (m w1 s1 w2 s2 w3 i) +;; (declare (ignore w1 w2 w3)) +;; (list i m s1 s2 i))) +;; (esrap:defrule j-type-1 (and j-type-1-m space dereference) +;; (:destructure (m w di) +;; (declare (ignore w)) +;; `(j ,m ,@di))) +;; (esrap:defrule j-type-2 (and j-type-2-m space label) +;; (:destructure (m w label) +;; (declare (ignore w)) +;; (list j m "00000" label))) +;; (esrap:defrule j-type-3 (and j-type-3-m space register) +;; (:destructure (m w r) +;; (declare (ignore w)) +;; (list j m r "00000"))) + +(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 j-type-2 j-type-3)) + + +;; (esrap:parse 'register "$3") + +;; (esrap:parse 'j-type-1 "JMP 3($3)") +;; (esrap:parse 'j-type-2 "JRL FOO") +;; (esrap:parse 'j-type-3 "PUSH $1") diff --git a/src/util.lisp b/src/util.lisp index 027a770..93db659 100644 --- a/src/util.lisp +++ b/src/util.lisp @@ -11,6 +11,25 @@ (declare (type (integer 0 *) len)) (format nil "~V,'0b" len num)) +(defun insert-in-middle (list element) + (append (list (car list)) (list element) (cdr list))) + +(defun iota (n) + "Generates a number sequence from 0 to N." + (when (> n 0) + (do ((i 0 (1+ i)) + (item 0 (1+ item)) + (result nil (push item result))) + ((= i n) (nreverse result))))) + +(defun riffle (lst1 lst2) + "Given LST1 and LST2, returns a new list which is the an alternative sequence +of the elements from both lists. Returns nil if the lists are not equal size." + (when (eq (length lst1) (length lst2)) + (loop for l1 in lst1 + for l2 in lst2 + append (list l1 l2)))) + (defparameter type-r '(ADD SUB MUL QUOT REM SFTR SFTL AND OR NOT XOR ADDV SUBV MULV DIVV CMP CEV) "R-type instructions.") diff --git a/t/lex.lisp b/t/lex.lisp index 7a20608..98d7c28 100644 --- a/t/lex.lisp +++ b/t/lex.lisp @@ -19,6 +19,10 @@ " (is (eq (lex:read-token) 'lex::nl)))) +(test read-token-reads-left-paren + (read-this "." + (is (eq (lex:read-token) 'lex::period)))) + (test read-token-reads-left-paren (read-this "(" (is (eq (lex:read-token) 'lex::left-paren)))) diff --git a/t/parse.lisp b/t/parse.lisp index 7187292..77db380 100644 --- a/t/parse.lisp +++ b/t/parse.lisp @@ -1,76 +1,58 @@ (in-package #:rva-tests) -(defmacro expect-parse-error (body) - `(handler-case - (progn ,body - (fail)) - (parse::parser-error ()))) - (def-suite parse-tests :description "Test functions exported from the parser." :in all-tests) -(in-suite parse-tests) - -(test extract-label-is-a-label - (is (not (parse:extract-label '("LOOP" lex::colon))))) - -(test extract-label-not-a-label-one - (let ((lst '("NICE" "TRY"))) - (is (equal lst - (parse:extract-label lst))))) +;;; these tests are not exhaustive, and are meant to test basic functionality +;;; under correct circumstances. -(test extract-label-not-a-label-two - (let ((lst '("LOOP" lex::colon lex::colon))) - (is (equal lst - (parse:extract-label lst))))) - -(test extract-line-invalid-type - (expect-parse-error (parse:tokens->ast '(("foo" lex::dollar))))) - -(test to-register-nil - (expect-parse-error (parse:to-register '()))) - -(test to-register-singleton - (expect-parse-error (parse:to-register '(lex::dollar)))) +(in-suite parse-tests) -(test to-register-zero - (is (= 0 (parse:to-register '(lex::dollar 0))))) +(test esrap-register-decimal-ten + (is (equal (list 'parse::rr 10) + (esrap:parse 'parse::register "$10")))) -(test to-register-one - (is (= 1 (parse:to-register '(lex::dollar 1))))) +(test esrap-register-binary-ten + (is (equal (list 'parse::rr 10) + (esrap:parse 'parse::register "$0b1010")))) -(test to-register-twenty-three - (is (= 23 (parse:to-register '(lex::dollar 23))))) +(test esrap-register-octal-ten + (is (equal (list 'parse::rr 10) + (esrap:parse 'parse::register "$0o12")))) -(test to-register-zero-named - (is (= 0 (parse:to-register '(lex::dollar "zr"))))) +(test esrap-register-hex-ten + (is (equal (list 'parse::rr 10) + (esrap:parse 'parse::register "$0xa")))) -(test to-register-twenty-four - (expect-parse-error (parse:to-register '(lex::dollar 24)))) +(test esrap-r-type-1 + (is (equal '(parse::r "ADD" (parse::rr 5) (parse::rr 8) (parse::rr 1)) + (esrap:parse 'parse:instr "ADD $1 $5 $8")))) -(test to-register-negative-one - (expect-parse-error (parse:to-register '(lex::dollar -1)))) +(test esrap-r-type-2 + (is (equal '(parse::r "NOT" (parse::rr 5) (parse::rr 0) (parse::rr 1)) + (esrap:parse 'parse:instr "NOT $1 $5")))) -(test extract-r-type-no-registers - (expect-parse-error (parse:extract-r-type '("add") 0))) +(test esrap-r-type-3 + (is (equal '(parse::r "CMP" (parse::rr 1) (parse::rr 5) (parse::rr 0)) + (esrap:parse 'parse:instr "CMP $1 $5")))) -(test extract-r-type-two-registers - (expect-parse-error (parse:extract-r-type '("add" lex::dollar 2 lex::dollar 3) 0))) +;; (test esrap-i-type-1 +;; (is (equal (list 'parse::i "LOAD" (list 'parse::rr 8) (list 'parse::rr 9) (list 'parse::r 1)) +;; (esrap:parse 'parse:instr "LOAD $8 1($9)")))) -(test extract-r-type-cmp-three-registers - (expect-parse-error (parse:extract-r-type '("cmp" lex::dollar 2 - lex::dollar 3 lex::dollar 4) 0))) +;; (test esrap-i-type-2 +;; (is (equal (list 'parse::i "STORE" (list 'parse::rr 3) (list 'parse::rr 5) (list 'parse::rr 3)) +;; (esrap:parse 'parse:instr "STORE $5 3($3)")))) -(test extract-r-type-simple-add - (is (equal '(:op util::ADD :d 2 :s1 3 :s2 4) - (parse:extract-r-type '("add" lex::dollar 2 - lex::dollar 3 lex::dollar 4) 0)))) +;; (test esrap-i-type-3 +;; (is (equal (list 'parse::i "ORI" (list 'parse::rr 5) (list 'parse::rr 4) (list 'parse::r 2)) +;; (esrap:parse 'parse:instr "ORI $5 $4 2")))) -(test extract-r-type-simple-not - (is (equal '(:op util::NOT :d 2 :s1 3 :s2 0) - (parse:extract-r-type '("not" lex::dollar 2 lex::dollar 3) 0)))) +(test esrap-j-type-2 + (is (equal '(parse::j "JRL" (parse::rr 0) (parse::l "FOO")) + (esrap:parse 'parse:instr "JRL FOO")))) -(test extract-r-type-simple-cmp - (is (equal '(:op util::CMP :d 0 :s1 2 :s2 3) - (parse:extract-r-type '("cmp" lex::dollar 2 lex::dollar 3) 0)))) +(test esrap-j-type-3 + (is (equal '(parse::j "PUSH" (parse::rr 1) 0) + (esrap:parse 'parse:instr "PUSH $1")))) -- cgit v1.2.3 From ce80845b5cb5b473327e4e561342490576d83a90 Mon Sep 17 00:00:00 2001 From: bd Date: Wed, 9 Apr 2025 20:25:57 -0400 Subject: Successfully parse every instruction type --- input/add-loop.asm | 13 ++++--- rva.asd | 4 +- src/lex.lisp | 110 ----------------------------------------------------- src/main.lisp | 21 ++-------- src/package.lisp | 12 +----- src/parse.lisp | 95 +++++++++++++++++++++++++-------------------- t/lex.lisp | 104 -------------------------------------------------- t/parse.lisp | 24 +++++++----- 8 files changed, 81 insertions(+), 302 deletions(-) delete mode 100644 src/lex.lisp delete mode 100644 t/lex.lisp diff --git a/input/add-loop.asm b/input/add-loop.asm index 6379831..f8d97a1 100644 --- a/input/add-loop.asm +++ b/input/add-loop.asm @@ -1,17 +1,18 @@ - addi $fp $0 0x200 +.text + addi $2 $0 0x200 addi $5 $0 0x1 - store $5 0($fp) + store $5 0($2) addi $5 $0 0x2 - store $5 1($fp) + store $5 1($2) addi $5 $0 0x3 - store $5 2($fp) + store $5 2($2) addi $5 $0 0x4 - store $5 3($fp) + store $5 3($2) addi $5 $0 0x0 addi $6 $0 0x3 jrl CHECK LOOP: - add $9 $fp $5 + add $9 $2 $5 load $7 -0($9) load $8 +1($9) add $7 $7 $8 diff --git a/rva.asd b/rva.asd index dd904fb..9271472 100644 --- a/rva.asd +++ b/rva.asd @@ -15,8 +15,7 @@ :serial t :components ((:file "package") (:file "util") - (:file "lex") - (:file "parse") + (:file "parse") (:file "main")))) :long-description #.(uiop:read-file-string @@ -37,7 +36,6 @@ :components ((:file "package") (:file "main") (:file "util") - (:file "lex") (:file "parse")))) :perform (test-op (o s) (uiop:symbol-call :rva-tests :test-rva))) diff --git a/src/lex.lisp b/src/lex.lisp deleted file mode 100644 index c86d17c..0000000 --- a/src/lex.lisp +++ /dev/null @@ -1,110 +0,0 @@ -(in-package #:lex) - -(define-condition lexer-error (error) - ((message :initarg :message - :initform nil - :reader message)) - (:report (lambda (condition stream) - (format stream "~A" (message condition)))) - (:documentation "Dedicated error for an invalid lex.")) - -(defun file->tokens (file) - "Opens FILE and parses returns a list of tokens, or -NIL if the file could not be opened." - - (defun read-instr (lst tokens-so-far) - "Collects tokens in FILE into TOKENS-SO-FAR, splitting on a newline." - (let ((token (read-token))) - (cond ((null token) (reverse tokens-so-far)) - ((eq token 'nl) - (cons (reverse tokens-so-far) (read-instr nil nil))) - (t (read-instr lst (cons token tokens-so-far)))))) - - (and (probe-file file) - (with-open-file (*standard-input* file :direction :input) - (remove nil (read-instr '() '()))))) - -(defun read-token () - "Reads *STANDARD-INPUT* and returns a token, or nil if the end -of file has been reached. -Whitespace, commas, colons, and parentheses are token delimiters. -Comments start with a semi-colon ';' and all tokens after are ignored." - (let ((chr (read-char *standard-input* nil))) - (cond - ((null chr) chr) - - ((char= chr #\linefeed) 'nl) - - ((whitespace-char-p chr) - (read-token)) - - ((char= chr #\;) - (progn (read-line *standard-input* nil) - 'nl)) - - ((char= chr #\.) 'period) - - ((char= chr #\() 'left-paren) - ((char= chr #\)) 'right-paren) - - ((char= chr #\:) 'colon) - ((char= chr #\$) 'dollar) - - ((char= chr #\+) 'plus) - ((char= chr #\-) 'minus) - - ((digit-char-p chr) - (read-immediate chr)) - - ((alpha-char-p chr) - (read-keyword chr)) - - (t (error 'lexer-error - :message - (format nil "LEX failled--~a is not a valid lexical symbol.~%" chr)))))) - -(defun read-immediate (chr) - "Reads a sequence of digits, in base 2, 8, 10, or 16.. Throws -`invalid-immediate-or-keyword' error if an alphabetic character is encountered." - ;; may be combined with read-keyword-helper - (defun read-immediate-helper (chrs-so-far) - (let ((chr (peek-char nil *standard-input* nil))) - (cond ((and (not (null chr)) (digit-char-p chr)) - (read-immediate-helper (cons (read-char *standard-input* nil) chrs-so-far))) - ((and (not (null chr)) (alpha-char-p chr)) - (error 'lexer-error - :message - (format nil "LEX failed--encountered ~a while reading immediate.~%" chr))) - (t (reverse chrs-so-far))))) - - (let* ((next (peek-char nil *standard-input* nil)) - (radix (cond ((null next) 10) - ((char= next #\b) 2) - ((char= next #\o) 8) - ((char= next #\x) 16) - ((alpha-char-p next) nil) - (t 10))) - (arg (list chr))) - (when (and (char= chr #\0) radix (not (= radix 10))) - (read-char *standard-input* nil) - (setq arg '())) - (parse-integer (coerce (read-immediate-helper arg) 'string) :radix radix))) - -(defun read-keyword (chr) - "Reads a sequence of alphabetic characters. Throws `invalid-immediate-or-keyword' -error if a digit is encountered." - ;; may be combined with read-immediate-helper - (defun read-keyword-helper (chrs-so-far) - (let ((chr (peek-char nil *standard-input* nil))) - (cond ((and (not (null chr)) (alpha-char-p chr)) - (read-keyword-helper (cons (read-char *standard-input* nil) chrs-so-far))) - ((and (not (null chr)) (digit-char-p chr)) - (error 'lexer-error - :message - (format nil "LEX failed--encountered ~a while reading keyword.~%" chr))) - (t (reverse chrs-so-far))))) - (coerce (read-keyword-helper (list chr)) 'string)) - -(defun whitespace-char-p (x) - (or (char= #\space x) - (not (graphic-char-p x)))) diff --git a/src/main.lisp b/src/main.lisp index 8a36a66..9180d57 100644 --- a/src/main.lisp +++ b/src/main.lisp @@ -17,13 +17,6 @@ _/_/ _/_/ " (defun cli/options () "Returns options for the `rva' assembler." (list - (clingon:make-option - :flag - :description "run the lexer, but stop before parsing" - :long-name "lex" - :short-name #\l - :required nil - :key :lex) (clingon:make-option :flag :description "run the parser, but stop before emission" @@ -37,24 +30,18 @@ _/_/ _/_/ " (print-splash) (let* ((args (clingon:command-arguments cmd)) (file (car args)) - (parse? (not (clingon:getopt cmd :lex))) - (emit? (not (clingon:getopt cmd :parse)))) + (emit? (not (clingon:getopt cmd :parse)))) (cond ;; complain about num arguments ((/= (length args) 1) (error "Wrong number of arguments.~%")) ((not (util:asm-extension? file)) (error "The file is not an asm source code file.~%")) - (t (let ((tokens (lex:file->tokens file))) - (if tokens - (progn (pprint tokens) + (t (let ((str (uiop:read-file-string file))) + (if str + (progn (pprint (esrap:parse 'parse:text (string-upcase str))) (terpri) (format t "---~%")) (error "The file does not exist, or it could not be opened.~%")) - (if parse? - (let ((tokens (parse:tokens->ast tokens))) - (progn (pprint tokens) - (terpri) - (format t "---~%")))) (format t "Nitimur in Vetitum~%")))))) diff --git a/src/package.lisp b/src/package.lisp index b856e38..2b01d15 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -10,18 +10,10 @@ #:iota #:riffle)) -(defpackage #:lex - (:use #:cl) - (:export #:lexer-error - #:file->tokens - ;; exported for testing only - #:read-token)) - (defpackage #:parse (:use #:cl) - (:export #:parser-error - #:tokens->ast - ;; exported for testing only + (:export ;; exported for testing only + #:text #:register #:instr )) diff --git a/src/parse.lisp b/src/parse.lisp index 54b0709..a92eae7 100644 --- a/src/parse.lisp +++ b/src/parse.lisp @@ -1,22 +1,24 @@ (in-package #:parse) (esrap:defrule space - (+ (or #\Space #\Tab)) + (+ (or #\space #\tab)) (:constant nil)) +(esrap:defrule newline + (+ #\newline)) + ;;; defines rules to parse an integer in various bases -(esrap:defrule binary (and #\0 #\b (+ (or "0" "1"))) +(esrap:defrule binary (and #\0 #\B (+ (or "0" "1"))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 2))) -(esrap:defrule octal (and #\0 #\o (+ (or (esrap:character-ranges (#\0 #\7))))) +(esrap:defrule octal (and #\0 #\O (+ (or (esrap:character-ranges (#\0 #\7))))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 8))) (esrap:defrule decimal (+ (or (esrap:character-ranges (#\0 #\9)))) (:lambda (list) (parse-integer (esrap:text list) :radix 10))) -(esrap:defrule hex (and #\0 #\x (+ (or (esrap:character-ranges (#\0 #\9)) - "a" "b" "c" "d" "e" "f" +(esrap:defrule hex (and #\0 #\X (+ (or (esrap:character-ranges (#\0 #\9)) "A" "B" "C" "D" "E" "F"))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 16))) @@ -30,7 +32,10 @@ (esrap:defrule dereference (and (esrap:? (or #\+ #\-)) int #\( register #\)) (:destructure (s i1 w1 r w2) (declare (ignore w1 w2)) - (list r (if (and s (string= s "-")) (- i1) i1)))) + (list r (list 'imm (if (and s (string= s "-")) (- i1) i1))))) + +(esrap:defrule immediate int + (:lambda (i) (list 'imm i))) ;;; defines rules to parse labels @@ -56,6 +61,11 @@ (esrap:defrule j-type-3-m (or "PUSH" "POP")) (defmacro defrule-instr (name type-id order &rest destructure-pattern) + "Defines the boilerplate for a common esrap instruction rule. +NAME is the name of the non-terminal symbol. +TYPE-ID is the symbol which appears as the first element of a successful parse. +ORDER is the order to place the parsed tokens in the resulting list. +DESTRUCTURE-PATTERN is the list of non-terminals on the right side of the grammar rule." (let* ((pattern-size (length destructure-pattern)) (spaces (mapcar (lambda (x) (read-from-string (format nil "w~A" x))) (util:iota pattern-size))) (vars (mapcar (lambda (x) (read-from-string (format nil "s~A" x))) (util:iota pattern-size)))) @@ -68,40 +78,41 @@ (defrule-instr r-type-1 'r (1 2 0) register register register) (defrule-instr r-type-2 'r (1 2 0) register register) (defrule-instr r-type-3 'r (0 1 2) register register) -;;(defrule-instr j-type-1 'j (0 1) dereference) + +(esrap:defrule i-type-1 (and i-type-1-m space register space dereference) + (:destructure (m w1 s w2 di) + (declare (ignore w1 w2)) + `(i ,m ,s ,@di))) + +(esrap:defrule i-type-2 (and i-type-2-m space register space dereference) + (:destructure (m w1 s w2 di) + (declare (ignore w1 w2)) + `(i ,m ,@(util:insert-in-middle di s)))) + +(defrule-instr i-type-3 'i (0 1 2) register register immediate) +(esrap:defrule j-type-1 (and j-type-1-m space dereference) + (:destructure (m w di) + (declare (ignore w)) + `(j ,m ,@di))) + (defrule-instr j-type-2 'j (1 0) label) -(defrule-instr j-type-3 'j (0 1) register) - -;; (esrap:defrule i-type-1 (and i-type-1-m space register space dereference) -;; (:destructure (m w1 s w2 di) -;; (declare (ignore w1 w2)) -;; `(i ,m ,s ,@di))) -;; (esrap:defrule i-type-2 (and i-type-2-m space register space dereference) -;; (:destructure (m w1 s w2 di) -;; (declare (ignore w1 w2)) -;; `(i ,m ,@(util:insert-in-middle di s)))) -;; (esrap:defrule i-type-3 (and i-type-3-m space register space register space int) -;; (:destructure (m w1 s1 w2 s2 w3 i) -;; (declare (ignore w1 w2 w3)) -;; (list i m s1 s2 i))) -;; (esrap:defrule j-type-1 (and j-type-1-m space dereference) -;; (:destructure (m w di) -;; (declare (ignore w)) -;; `(j ,m ,@di))) -;; (esrap:defrule j-type-2 (and j-type-2-m space label) -;; (:destructure (m w label) -;; (declare (ignore w)) -;; (list j m "00000" label))) -;; (esrap:defrule j-type-3 (and j-type-3-m space register) -;; (:destructure (m w r) -;; (declare (ignore w)) -;; (list j m r "00000"))) - -(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 j-type-2 j-type-3)) - - -;; (esrap:parse 'register "$3") - -;; (esrap:parse 'j-type-1 "JMP 3($3)") -;; (esrap:parse 'j-type-2 "JRL FOO") -;; (esrap:parse 'j-type-3 "PUSH $1") +(esrap:defrule j-type-3 (and j-type-3-m space register) + (:destructure (m w r) + (declare (ignore w)) + `(j ,m ,r (imm 0)))) + +(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 i-type-1 i-type-2 + i-type-3 j-type-1 j-type-2 j-type-3 label-decl)) + +;;; defines rules to parse the .text segment + +(esrap:defrule instr-clean (and (esrap:? space) instr newline) + (:destructure (w1 i w2) + (declare (ignore nl l)) + i)) + +(esrap:defrule text (and ".TEXT" newline + (* instr-clean)) + (:destructure (txt nl is) + (declare (ignore txt nl)) + (list 'text is))) diff --git a/t/lex.lisp b/t/lex.lisp deleted file mode 100644 index 98d7c28..0000000 --- a/t/lex.lisp +++ /dev/null @@ -1,104 +0,0 @@ -(in-package #:rva-tests) - -(defmacro read-this (str &body body) - `(let ((*standard-input* (make-string-input-stream ,str))) - ,@body)) - -(def-suite lex-tests - :description "Test functions exported from the lexer." - :in all-tests) - -(in-suite lex-tests) - -(test read-token-reads-eof - (read-this "" - (is (not (lex:read-token))))) - -(test read-token-reads-nl - (read-this " -" - (is (eq (lex:read-token) 'lex::nl)))) - -(test read-token-reads-left-paren - (read-this "." - (is (eq (lex:read-token) 'lex::period)))) - -(test read-token-reads-left-paren - (read-this "(" - (is (eq (lex:read-token) 'lex::left-paren)))) - -(test read-token-reads-right-paren - (read-this ")" - (is (eq (lex:read-token) 'lex::right-paren)))) - -(test read-token-reads-left-paren - (read-this "$" - (is (eq (lex:read-token) 'lex::dollar)))) - -(test read-token-reads-plus - (read-this "+" - (is (eq (lex:read-token) 'lex::plus)))) - -(test read-token-reads-minus - (read-this "-" - (is (eq (lex:read-token) 'lex::minus)))) - -(test read-token-ignores-space - (read-this " (" - (is (eq (lex:read-token) 'lex::left-paren)))) - -(test read-token-ignores-tab - (read-this " (" - (is (eq (lex:read-token) 'lex::left-paren)))) - -(test read-token-ignores-comment - (read-this "; this is a comment -(" - (is (eq (lex:read-token) 'lex::nl)))) - -(test read-token-immediate-zero - (read-this "0" - (is (= (lex:read-token) 0)))) - -(test read-token-immediate-all-digits - (read-this "123456789" - (is (= (lex:read-token) 123456789)))) - -(test read-token-immediate-binary - (read-this "0b00101010" - (is (= (lex:read-token) 42)))) - -(test read-token-immediate-octal - (read-this "0o052" - (is (= (lex:read-token) 42)))) - -(test read-token-immediate-hexadecimal - (read-this "0x200" - (is (= (lex:read-token) 512)))) - -(test read-token-immediate-invalid-immediate - (handler-case - (progn (read-this "0v0" (lex:read-token)) - (fail)) - (lex:lexer-error ()))) - -;; do we want a custom error for this too? -(test read-token-immediate-radix - (handler-case - (progn (read-this "0x" (lex:read-token)) - (fail)) - (sb-int:simple-parse-error ()))) - -(test read-token-keyword-single - (read-this "a" - (is (string= (lex:read-token) "a")))) - -(test read-token-keyword-add - (read-this "addi" - (is (string= (lex:read-token) "addi")))) - -(test read-token-immediate-invalid-keyword - (handler-case - (progn (read-this "sub0" (lex:read-token)) - (fail)) - (lex:lexer-error ()))) diff --git a/t/parse.lisp b/t/parse.lisp index 77db380..a9de6db 100644 --- a/t/parse.lisp +++ b/t/parse.lisp @@ -37,22 +37,26 @@ (is (equal '(parse::r "CMP" (parse::rr 1) (parse::rr 5) (parse::rr 0)) (esrap:parse 'parse:instr "CMP $1 $5")))) -;; (test esrap-i-type-1 -;; (is (equal (list 'parse::i "LOAD" (list 'parse::rr 8) (list 'parse::rr 9) (list 'parse::r 1)) -;; (esrap:parse 'parse:instr "LOAD $8 1($9)")))) +(test esrap-i-type-1 + (is (equal '(parse::i "LOAD" (parse::rr 8) (parse::rr 9) (parse::imm 1)) + (esrap:parse 'parse:instr "LOAD $8 1($9)")))) -;; (test esrap-i-type-2 -;; (is (equal (list 'parse::i "STORE" (list 'parse::rr 3) (list 'parse::rr 5) (list 'parse::rr 3)) -;; (esrap:parse 'parse:instr "STORE $5 3($3)")))) +(test esrap-i-type-2 + (is (equal '(parse::i "STORE" (parse::rr 3) (parse::rr 5) (parse::imm 3)) + (esrap:parse 'parse:instr "STORE $5 3($3)")))) -;; (test esrap-i-type-3 -;; (is (equal (list 'parse::i "ORI" (list 'parse::rr 5) (list 'parse::rr 4) (list 'parse::r 2)) -;; (esrap:parse 'parse:instr "ORI $5 $4 2")))) +(test esrap-i-type-3 + (is (equal '(parse::i "ORI" (parse::rr 5) (parse::rr 4) (parse::imm 2)) + (esrap:parse 'parse:instr "ORI $5 $4 2")))) + +(test esrap-j-type-1 + (is (equal '(parse::j "JMP" (parse::rr 3) (parse::imm 3)) + (esrap:parse 'parse:instr "JMP 3($3)")))) (test esrap-j-type-2 (is (equal '(parse::j "JRL" (parse::rr 0) (parse::l "FOO")) (esrap:parse 'parse:instr "JRL FOO")))) (test esrap-j-type-3 - (is (equal '(parse::j "PUSH" (parse::rr 1) 0) + (is (equal '(parse::j "PUSH" (parse::rr 1) (parse::imm 0)) (esrap:parse 'parse:instr "PUSH $1")))) -- cgit v1.2.3 From e50d44464db7eb8e1c20755c862466ac8f7419b0 Mon Sep 17 00:00:00 2001 From: bd Date: Wed, 9 Apr 2025 23:45:16 -0400 Subject: Properly maintain a hashmap of labels, lots of minor code cleanups --- src/main.lisp | 7 +++-- src/package.lisp | 11 ++++++- src/parse.lisp | 89 ++++++++++++++++++++++++++++++++++---------------------- src/util.lisp | 71 +++++++++++++++++++++++++++++++++++--------- t/parse.lisp | 12 ++++---- 5 files changed, 132 insertions(+), 58 deletions(-) diff --git a/src/main.lisp b/src/main.lisp index 9180d57..be9f69a 100644 --- a/src/main.lisp +++ b/src/main.lisp @@ -1,6 +1,6 @@ (in-package #:rva) -(defparameter *banner* +(defvar *banner* " _/_/ _/_/ _/ _/ _/ _/ _/_/ _/ _/ _/_/_/ _/ @@ -30,7 +30,7 @@ _/_/ _/_/ " (print-splash) (let* ((args (clingon:command-arguments cmd)) (file (car args)) - (emit? (not (clingon:getopt cmd :parse)))) + (emit? (not (clingon:getopt cmd :parse)))) (cond ;; complain about num arguments ((/= (length args) 1) (error "Wrong number of arguments.~%")) @@ -38,8 +38,9 @@ _/_/ _/_/ " (error "The file is not an asm source code file.~%")) (t (let ((str (uiop:read-file-string file))) (if str - (progn (pprint (esrap:parse 'parse:text (string-upcase str))) + (progn (pprint (esrap:parse 'parse:text (string-upcase str))) (terpri) + (maphash #'(lambda (k v) (format t "~A => ~A~%" k v)) util:label-table) (format t "---~%")) (error "The file does not exist, or it could not be opened.~%")) (format t "Nitimur in Vetitum~%")))))) diff --git a/src/package.lisp b/src/package.lisp index 2b01d15..efa617c 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -8,7 +8,16 @@ #:format-as-binary #:insert-in-middle #:iota - #:riffle)) + #:riffle + #:add-variable + #:add-label + #:get-variable + #:get-label + #:label-table + #:r-type + #:i-type + #:j-type + #:mnemonic-loc)) (defpackage #:parse (:use #:cl) diff --git a/src/parse.lisp b/src/parse.lisp index a92eae7..f9ede20 100644 --- a/src/parse.lisp +++ b/src/parse.lisp @@ -1,14 +1,22 @@ (in-package #:parse) +(defparameter line-number 0) + (esrap:defrule space (+ (or #\space #\tab)) (:constant nil)) (esrap:defrule newline - (+ #\newline)) + (+ #\newline) + (:destructure (n) + (declare (ignore n)) + (incf line-number) + nil)) ;;; defines rules to parse an integer in various bases +(defmacro define-number-rule ()) + (esrap:defrule binary (and #\0 #\B (+ (or "0" "1"))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 2))) @@ -19,7 +27,7 @@ (:lambda (list) (parse-integer (esrap:text list) :radix 10))) (esrap:defrule hex (and #\0 #\X (+ (or (esrap:character-ranges (#\0 #\9)) - "A" "B" "C" "D" "E" "F"))) + "A" "B" "C" "D" "E" "F"))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 16))) (esrap:defrule int (or binary octal hex decimal)) @@ -27,7 +35,8 @@ ;;; defines rules to parse an operand (esrap:defrule register (and #\$ (or int reg-id)) - (:lambda (list) (list 'rr (cadr list)))) + (:function cadr) + (:lambda (id) (list 'rr id))) (esrap:defrule dereference (and (esrap:? (or #\+ #\-)) int #\( register #\)) (:destructure (s i1 w1 r w2) @@ -43,22 +52,32 @@ (:lambda (list) (list 'l (esrap:text list)))) (esrap:defrule label-decl (and label #\:) - (:destructure (l w) - (declare (ignore w)) - l)) + (:function car) + (:lambda (l) + (util:add-label l line-number) + ;; this line isn't in the final program + (decf line-number) + nil)) ;;; defines rules to parse instruction types -(esrap:defrule r-type-1-m (or "ADDV" "SUBV" "MULV" "DIVV" "ADD" "SUB" "MUL" - "QUOT" "REM" "SFTR" "SFTL" "AND" "OR" "NOT" "XOR" )) -(esrap:defrule r-type-2-m "NOT") -(esrap:defrule r-type-3-m (or "CMP" "CEV")) -(esrap:defrule i-type-1-m (or "LOADV" "LOAD")) -(esrap:defrule i-type-2-m (or "STOREV" "STORE")) -(esrap:defrule i-type-3-m (or "ADDI" "SUBI" "SFTRI" "SFTLI" "ANDI" "ORI" "XORI")) -(esrap:defrule j-type-1-m (or "JMP" "JAL")) -(esrap:defrule j-type-2-m (or "JRL" "BEQ" "BGT" "BUF" "BOF")) -(esrap:defrule j-type-3-m (or "PUSH" "POP")) +(defun generate-mnemonic (name ops) + (let ((expr `(or ,@ops))) + (esrap:add-rule + name (make-instance 'esrap:rule :expression expr)))) + +;; define special cases first +(generate-mnemonic 'r-type-1-m '("NOT")) +(generate-mnemonic 'r-type-2-m '("CMP" "CEV")) +(generate-mnemonic 'i-type-1-m '("LOADV" "LOAD")) +(generate-mnemonic 'i-type-2-m '("STOREV" "STORE")) +(generate-mnemonic 'j-type-1-m '("JMP" "JAL")) +(generate-mnemonic 'j-type-2-m '("PUSH" "POP")) + +;; we need to reverse to ensure rules like "ADDV" are matched before "ADD" +(generate-mnemonic 'r-type-3-m (reverse util:r-type)) +(generate-mnemonic 'i-type-3-m (reverse util:i-type)) +(generate-mnemonic 'j-type-3-m (reverse util:j-type)) (defmacro defrule-instr (name type-id order &rest destructure-pattern) "Defines the boilerplate for a common esrap instruction rule. @@ -67,17 +86,19 @@ TYPE-ID is the symbol which appears as the first element of a successful parse. ORDER is the order to place the parsed tokens in the resulting list. DESTRUCTURE-PATTERN is the list of non-terminals on the right side of the grammar rule." (let* ((pattern-size (length destructure-pattern)) - (spaces (mapcar (lambda (x) (read-from-string (format nil "w~A" x))) (util:iota pattern-size))) - (vars (mapcar (lambda (x) (read-from-string (format nil "s~A" x))) (util:iota pattern-size)))) + (spaces (mapcar (lambda (x) (read-from-string (format nil "w~A" x))) (util:iota pattern-size))) + (vars (mapcar (lambda (x) (read-from-string (format nil "s~A" x))) (util:iota pattern-size)))) `(esrap:defrule ,name - (and ,(read-from-string (format nil "~A-m" name)) ,@(util:riffle (make-list pattern-size :initial-element 'space) destructure-pattern)) + (and ,(read-from-string (format nil "~A-m" name)) ,@(util:riffle (make-list pattern-size :initial-element 'space) destructure-pattern)) (:destructure (m ,@(util:riffle spaces vars)) - (declare (ignore ,@spaces)) - (list ,type-id m ,@(mapcar (lambda (x) (or (nth x vars) ''(rr 0))) order)))))) + (declare (ignore ,@spaces)) + (list ,type-id m ,@(mapcar (lambda (x) (or (nth x vars) ''(rr 0))) order)))))) -(defrule-instr r-type-1 'r (1 2 0) register register register) -(defrule-instr r-type-2 'r (1 2 0) register register) -(defrule-instr r-type-3 'r (0 1 2) register register) +(defrule-instr r-type-1 'r (1 2 0) register register) +(defrule-instr r-type-2 'r (0 1 2) register register) +(defrule-instr r-type-3 'r (1 2 0) register register register) +(defrule-instr i-type-3 'i (0 1 2) register register immediate) +(defrule-instr j-type-3 'j (1 0) label) (esrap:defrule i-type-1 (and i-type-1-m space register space dereference) (:destructure (m w1 s w2 di) @@ -89,30 +110,28 @@ DESTRUCTURE-PATTERN is the list of non-terminals on the right side of the gramma (declare (ignore w1 w2)) `(i ,m ,@(util:insert-in-middle di s)))) -(defrule-instr i-type-3 'i (0 1 2) register register immediate) (esrap:defrule j-type-1 (and j-type-1-m space dereference) (:destructure (m w di) (declare (ignore w)) `(j ,m ,@di))) -(defrule-instr j-type-2 'j (1 0) label) -(esrap:defrule j-type-3 (and j-type-3-m space register) +(esrap:defrule j-type-2 (and j-type-2-m space register) (:destructure (m w r) (declare (ignore w)) `(j ,m ,r (imm 0)))) (esrap:defrule instr (or r-type-1 r-type-2 r-type-3 i-type-1 i-type-2 - i-type-3 j-type-1 j-type-2 j-type-3 label-decl)) + i-type-3 j-type-1 j-type-2 j-type-3 label-decl)) ;;; defines rules to parse the .text segment (esrap:defrule instr-clean (and (esrap:? space) instr newline) - (:destructure (w1 i w2) - (declare (ignore nl l)) - i)) + (:function cadr)) (esrap:defrule text (and ".TEXT" newline - (* instr-clean)) - (:destructure (txt nl is) - (declare (ignore txt nl)) - (list 'text is))) + (* instr-clean)) + (:function caddr) + (:lambda (instr) + `(text ,@(remove nil instr)))) + +;;; defines rules to parse the .data segment diff --git a/src/util.lisp b/src/util.lisp index 93db659..d8bd227 100644 --- a/src/util.lisp +++ b/src/util.lisp @@ -4,7 +4,10 @@ "Returns t if FILE is extended with .asm, nil otherwise." (string= (pathname-type file) "asm")) -;; TODO this won't work for negative numbers of odd sizes quite yet. +(defun fits-in-X-bits (n) + "Returns the number of bits required to represent N" + (ceiling (/ (log (ceiling n (log 2))) (log 2)))) + (defun format-as-binary (num len) "Formats NUM as a binary number, and pads to LEN with zeros." (declare (type number num)) @@ -18,26 +21,68 @@ "Generates a number sequence from 0 to N." (when (> n 0) (do ((i 0 (1+ i)) - (item 0 (1+ item)) - (result nil (push item result))) - ((= i n) (nreverse result))))) + (item 0 (1+ item)) + (result nil (push item result))) + ((= i n) (nreverse result))))) (defun riffle (lst1 lst2) - "Given LST1 and LST2, returns a new list which is the an alternative sequence + "Given LST1 and LST2, returns a new list which is the an alternating sequence of the elements from both lists. Returns nil if the lists are not equal size." (when (eq (length lst1) (length lst2)) (loop for l1 in lst1 - for l2 in lst2 - append (list l1 l2)))) + for l2 in lst2 + append (list l1 l2)))) + +(defvar variable-table (make-hash-table :test #'equal)) +(defvar label-table (make-hash-table :test #'equal)) + +(defun add-variable (name value) + (if (gethash name variable-table) + (error "~@" name) + (setf (gethash name variable-table) value))) + +(defun add-label (name value) + (if (gethash name label-table) + (error "~@