diff options
author | bd <bdunahu@operationnull.com> | 2025-04-09 23:45:16 -0400 |
---|---|---|
committer | bd <bdunahu@operationnull.com> | 2025-04-09 23:45:16 -0400 |
commit | e50d44464db7eb8e1c20755c862466ac8f7419b0 (patch) | |
tree | 495e62cb00fbb67f418c50cb96074fb638126877 | |
parent | ce80845b5cb5b473327e4e561342490576d83a90 (diff) |
Properly maintain a hashmap of labels, lots of minor code cleanups
-rw-r--r-- | src/main.lisp | 7 | ||||
-rw-r--r-- | src/package.lisp | 11 | ||||
-rw-r--r-- | src/parse.lisp | 89 | ||||
-rw-r--r-- | src/util.lisp | 71 | ||||
-rw-r--r-- | t/parse.lisp | 12 |
5 files changed, 132 insertions, 58 deletions
diff --git a/src/main.lisp b/src/main.lisp index 9180d57..be9f69a 100644 --- a/src/main.lisp +++ b/src/main.lisp @@ -1,6 +1,6 @@ (in-package #:rva) -(defparameter *banner* +(defvar *banner* " _/_/ _/_/ _/ _/ _/ _/ _/_/ _/ _/ _/_/_/ _/ @@ -30,7 +30,7 @@ _/_/ _/_/ " (print-splash) (let* ((args (clingon:command-arguments cmd)) (file (car args)) - (emit? (not (clingon:getopt cmd :parse)))) + (emit? (not (clingon:getopt cmd :parse)))) (cond ;; complain about num arguments ((/= (length args) 1) (error "Wrong number of arguments.~%")) @@ -38,8 +38,9 @@ _/_/ _/_/ " (error "The file is not an asm source code file.~%")) (t (let ((str (uiop:read-file-string file))) (if str - (progn (pprint (esrap:parse 'parse:text (string-upcase str))) + (progn (pprint (esrap:parse 'parse:text (string-upcase str))) (terpri) + (maphash #'(lambda (k v) (format t "~A => ~A~%" k v)) util:label-table) (format t "---~%")) (error "The file does not exist, or it could not be opened.~%")) (format t "Nitimur in Vetitum~%")))))) diff --git a/src/package.lisp b/src/package.lisp index 2b01d15..efa617c 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -8,7 +8,16 @@ #:format-as-binary #:insert-in-middle #:iota - #:riffle)) + #:riffle + #:add-variable + #:add-label + #:get-variable + #:get-label + #:label-table + #:r-type + #:i-type + #:j-type + #:mnemonic-loc)) (defpackage #:parse (:use #:cl) diff --git a/src/parse.lisp b/src/parse.lisp index a92eae7..f9ede20 100644 --- a/src/parse.lisp +++ b/src/parse.lisp @@ -1,14 +1,22 @@ (in-package #:parse) +(defparameter line-number 0) + (esrap:defrule space (+ (or #\space #\tab)) (:constant nil)) (esrap:defrule newline - (+ #\newline)) + (+ #\newline) + (:destructure (n) + (declare (ignore n)) + (incf line-number) + nil)) ;;; defines rules to parse an integer in various bases +(defmacro define-number-rule ()) + (esrap:defrule binary (and #\0 #\B (+ (or "0" "1"))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 2))) @@ -19,7 +27,7 @@ (:lambda (list) (parse-integer (esrap:text list) :radix 10))) (esrap:defrule hex (and #\0 #\X (+ (or (esrap:character-ranges (#\0 #\9)) - "A" "B" "C" "D" "E" "F"))) + "A" "B" "C" "D" "E" "F"))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 16))) (esrap:defrule int (or binary octal hex decimal)) @@ -27,7 +35,8 @@ ;;; defines rules to parse an operand (esrap:defrule register (and #\$ (or int reg-id)) - (:lambda (list) (list 'rr (cadr list)))) + (:function cadr) + (:lambda (id) (list 'rr id))) (esrap:defrule dereference (and (esrap:? (or #\+ #\-)) int #\( register #\)) (:destructure (s i1 w1 r w2) @@ -43,22 +52,32 @@ (:lambda (list) (list 'l (esrap:text list)))) (esrap:defrule label-decl (and label #\:) - (:destructure (l w) - (declare (ignore w)) - l)) + (:function car) + (:lambda (l) + (util:add-label l line-number) + ;; this line isn't in the final program + (decf line-number) + nil)) ;;; defines rules to parse instruction types -(esrap:defrule r-type-1-m (or "ADDV" "SUBV" "MULV" "DIVV" "ADD" "SUB" "MUL" - "QUOT" "REM" "SFTR" "SFTL" "AND" "OR" "NOT" "XOR" )) -(esrap:defrule r-type-2-m "NOT") -(esrap:defrule r-type-3-m (or "CMP" "CEV")) -(esrap:defrule i-type-1-m (or "LOADV" "LOAD")) -(esrap:defrule i-type-2-m (or "STOREV" "STORE")) -(esrap:defrule i-type-3-m (or "ADDI" "SUBI" "SFTRI" "SFTLI" "ANDI" "ORI" "XORI")) -(esrap:defrule j-type-1-m (or "JMP" "JAL")) -(esrap:defrule j-type-2-m (or "JRL" "BEQ" "BGT" "BUF" "BOF")) -(esrap:defrule j-type-3-m (or "PUSH" "POP")) +(defun generate-mnemonic (name ops) + (let ((expr `(or ,@ops))) + (esrap:add-rule + name (make-instance 'esrap:rule :expression expr)))) + +;; define special cases first +(generate-mnemonic 'r-type-1-m '("NOT")) +(generate-mnemonic 'r-type-2-m '("CMP" "CEV")) +(generate-mnemonic 'i-type-1-m '("LOADV" "LOAD")) +(generate-mnemonic 'i-type-2-m '("STOREV" "STORE")) +(generate-mnemonic 'j-type-1-m '("JMP" "JAL")) +(generate-mnemonic 'j-type-2-m '("PUSH" "POP")) + +;; we need to reverse to ensure rules like "ADDV" are matched before "ADD" +(generate-mnemonic 'r-type-3-m (reverse util:r-type)) +(generate-mnemonic 'i-type-3-m (reverse util:i-type)) +(generate-mnemonic 'j-type-3-m (reverse util:j-type)) (defmacro defrule-instr (name type-id order &rest destructure-pattern) "Defines the boilerplate for a common esrap instruction rule. @@ -67,17 +86,19 @@ TYPE-ID is the symbol which appears as the first element of a successful parse. ORDER is the order to place the parsed tokens in the resulting list. DESTRUCTURE-PATTERN is the list of non-terminals on the right side of the grammar rule." (let* ((pattern-size (length destructure-pattern)) - (spaces (mapcar (lambda (x) (read-from-string (format nil "w~A" x))) (util:iota pattern-size))) - (vars (mapcar (lambda (x) (read-from-string (format nil "s~A" x))) (util:iota pattern-size)))) + (spaces (mapcar (lambda (x) (read-from-string (format nil "w~A" x))) (util:iota pattern-size))) + (vars (mapcar (lambda (x) (read-from-string (format nil "s~A" x))) (util:iota pattern-size)))) `(esrap:defrule ,name - (and ,(read-from-string (format nil "~A-m" name)) ,@(util:riffle (make-list pattern-size :initial-element 'space) destructure-pattern)) + (and ,(read-from-string (format nil "~A-m" name)) ,@(util:riffle (make-list pattern-size :initial-element 'space) destructure-pattern)) (:destructure (m ,@(util:riffle spaces vars)) - (declare (ignore ,@spaces)) - (list ,type-id m ,@(mapcar (lambda (x) (or (nth x vars) ''(rr 0))) order)))))) + (declare (ignore ,@spaces)) + (list ,type-id m ,@(mapcar (lambda (x) (or (nth x vars) ''(rr 0))) order)))))) -(defrule-instr r-type-1 'r (1 2 0) register register register) -(defrule-instr r-type-2 'r (1 2 0) register register) -(defrule-instr r-type-3 'r (0 1 2) register register) +(defrule-instr r-type-1 'r (1 2 0) register register) +(defrule-instr r-type-2 'r (0 1 2) register register) +(defrule-instr r-type-3 'r (1 2 0) register register register) +(defrule-instr i-type-3 'i (0 1 2) register register immediate) +(defrule-instr j-type-3 'j (1 0) label) (esrap:defrule i-type-1 (and i-type-1-m space register space dereference) (:destructure (m w1 s w2 di) @@ -89,30 +110,28 @@ DESTRUCTURE-PATTERN is the list of non-terminals on the right side of the gramma (declare (ignore w1 w2)) `(i ,m ,@(util:insert-in-middle di s)))) -(defrule-instr i-type-3 'i (0 1 2) register register immediate) (esrap:defrule j-type-1 (and j-type-1-m space dereference) (:destructure (m w di) (declare (ignore w)) `(j ,m ,@di))) -(defrule-instr j-type-2 'j (1 0) label) -(esrap:defrule j-type-3 (and j-type-3-m space register) +(esrap:defrule j-type-2 (and j-type-2-m space register) (:destructure (m w r) (declare (ignore w)) `(j ,m ,r (imm 0)))) (esrap:defrule instr (or r-type-1 r-type-2 r-type-3 i-type-1 i-type-2 - i-type-3 j-type-1 j-type-2 j-type-3 label-decl)) + i-type-3 j-type-1 j-type-2 j-type-3 label-decl)) ;;; defines rules to parse the .text segment (esrap:defrule instr-clean (and (esrap:? space) instr newline) - (:destructure (w1 i w2) - (declare (ignore nl l)) - i)) + (:function cadr)) (esrap:defrule text (and ".TEXT" newline - (* instr-clean)) - (:destructure (txt nl is) - (declare (ignore txt nl)) - (list 'text is))) + (* instr-clean)) + (:function caddr) + (:lambda (instr) + `(text ,@(remove nil instr)))) + +;;; defines rules to parse the .data segment diff --git a/src/util.lisp b/src/util.lisp index 93db659..d8bd227 100644 --- a/src/util.lisp +++ b/src/util.lisp @@ -4,7 +4,10 @@ "Returns t if FILE is extended with .asm, nil otherwise." (string= (pathname-type file) "asm")) -;; TODO this won't work for negative numbers of odd sizes quite yet. +(defun fits-in-X-bits (n) + "Returns the number of bits required to represent N" + (ceiling (/ (log (ceiling n (log 2))) (log 2)))) + (defun format-as-binary (num len) "Formats NUM as a binary number, and pads to LEN with zeros." (declare (type number num)) @@ -18,26 +21,68 @@ "Generates a number sequence from 0 to N." (when (> n 0) (do ((i 0 (1+ i)) - (item 0 (1+ item)) - (result nil (push item result))) - ((= i n) (nreverse result))))) + (item 0 (1+ item)) + (result nil (push item result))) + ((= i n) (nreverse result))))) (defun riffle (lst1 lst2) - "Given LST1 and LST2, returns a new list which is the an alternative sequence + "Given LST1 and LST2, returns a new list which is the an alternating sequence of the elements from both lists. Returns nil if the lists are not equal size." (when (eq (length lst1) (length lst2)) (loop for l1 in lst1 - for l2 in lst2 - append (list l1 l2)))) + for l2 in lst2 + append (list l1 l2)))) + +(defvar variable-table (make-hash-table :test #'equal)) +(defvar label-table (make-hash-table :test #'equal)) + +(defun add-variable (name value) + (if (gethash name variable-table) + (error "~@<Variable declared twice: ~S.~@:>" name) + (setf (gethash name variable-table) value))) + +(defun add-label (name value) + (if (gethash name label-table) + (error "~@<Label declared twice: ~S.~@:>" name) + (setf (gethash name label-table) value))) -(defparameter type-r - '(ADD SUB MUL QUOT REM SFTR SFTL AND OR NOT XOR ADDV SUBV MULV DIVV CMP CEV) +(defun get-variable (name) + (alexandria:if-let ((value (gethash name variable-table))) + value + (error "~@<Variable ~S not declared.~@:>" name))) + +(defun get-label (name) + (alexandria:if-let ((value (gethash name label-table))) + value + (error "~@<Label ~S not found.~@:>" name))) + +(defmacro generate-type-map (ops) + "Generates an alist where the key corresponds to an element in +OPS, while the value is the index of that key (padded to the minimum +number of bits required to represent all +concatenated with TYPE." + `(let ((i 0) + (opsize (fits-in-X-bits (length ,ops)))) + (mapcar (lambda (x) + (incf i) + (cons x (format-as-binary i opsize))) + ,ops))) + +(defvar r-type + '("ADD" "SUB" "MUL" "QUOT" "REM" "SFTR" "SFTL" "AND" "OR" + "NOT" "XOR" "ADDV" "SUBV" "MULV" "DIVV" "CMP" "CEV") "R-type instructions.") -(defparameter type-i - '(LOAD LOADV ADDI SUBI SFTRI SFTLI ANDI ORI XORI STORE STOREV MOV) +(defvar i-type + '("LOAD" "LOADV" "ADDI" "SUBI" "SFTRI" "SFTLI" "ANDI" "ORI" "XORI" "STORE" "STOREV") "I-type instructions.") -(defparameter type-j - '(JMP JRL JAL BEQ BGT BUF BOF PUSH POP) +(defvar j-type + '("JMP" "JRL" "JAL" "BEQ" "BGT" "BUF" "BOF" "PUSH" "POP") "J-type instructions.") + +(defvar mnemonic-loc + `(,@(generate-type-map r-type) + ,@(generate-type-map i-type) + ,@(generate-type-map j-type)) + "An alist mapping known mnemonics to their binary representation.") diff --git a/t/parse.lisp b/t/parse.lisp index a9de6db..b32263a 100644 --- a/t/parse.lisp +++ b/t/parse.lisp @@ -15,15 +15,15 @@ (test esrap-register-binary-ten (is (equal (list 'parse::rr 10) - (esrap:parse 'parse::register "$0b1010")))) + (esrap:parse 'parse::register "$0B1010")))) (test esrap-register-octal-ten (is (equal (list 'parse::rr 10) - (esrap:parse 'parse::register "$0o12")))) + (esrap:parse 'parse::register "$0O12")))) (test esrap-register-hex-ten (is (equal (list 'parse::rr 10) - (esrap:parse 'parse::register "$0xa")))) + (esrap:parse 'parse::register "$0XA")))) (test esrap-r-type-1 (is (equal '(parse::r "ADD" (parse::rr 5) (parse::rr 8) (parse::rr 1)) @@ -51,12 +51,12 @@ (test esrap-j-type-1 (is (equal '(parse::j "JMP" (parse::rr 3) (parse::imm 3)) - (esrap:parse 'parse:instr "JMP 3($3)")))) + (esrap:parse 'parse:instr "JMP 3($3)")))) (test esrap-j-type-2 (is (equal '(parse::j "JRL" (parse::rr 0) (parse::l "FOO")) - (esrap:parse 'parse:instr "JRL FOO")))) + (esrap:parse 'parse:instr "JRL FOO")))) (test esrap-j-type-3 (is (equal '(parse::j "PUSH" (parse::rr 1) (parse::imm 0)) - (esrap:parse 'parse:instr "PUSH $1")))) + (esrap:parse 'parse:instr "PUSH $1")))) |