From 6e338215192c26dfb16236398ca8e3762a8d4d0e Mon Sep 17 00:00:00 2001 From: bd Date: Thu, 20 Mar 2025 13:51:28 -0400 Subject: Add logic to open file, lex single character symbols, tests --- src/package.lisp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/package.lisp') diff --git a/src/package.lisp b/src/package.lisp index 9d21293..44399cb 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -5,3 +5,9 @@ (defpackage #:util (:use #:cl) (:export #:asm-extension?)) + +(defpackage #:lex + (:use #:cl) + (:export #:file->tokens + ;; exported for testing only + #:read-token)) -- cgit v1.2.3 From 4d8ffb6a29285f12d9ebd788727d633550510d7f Mon Sep 17 00:00:00 2001 From: bd Date: Thu, 3 Apr 2025 01:07:13 -0400 Subject: Add lexing for immediates, keywords, and associated tests --- src/lex.lisp | 57 +++++++++++++++++++++++++++++++++++++++++++++----------- src/main.lisp | 9 +++++---- src/package.lisp | 3 ++- t/lex.lisp | 46 ++++++++++++++++++++++++++++++++++++--------- 4 files changed, 90 insertions(+), 25 deletions(-) (limited to 'src/package.lisp') diff --git a/src/lex.lisp b/src/lex.lisp index ad386ba..c6e9cf7 100644 --- a/src/lex.lisp +++ b/src/lex.lisp @@ -1,19 +1,34 @@ (in-package #:lex) +(define-condition invalid-immediate-or-keyword (error) + ((chr :initarg :chr + :initform nil + :reader chr) + (instance :initarg :instance + :initform nil + :reader instance)) + (:report (lambda (condition stream) + (format stream + "Lex failed--encountered ~a while reading ~a." + (chr condition) (instance condition)))) + (:documentation "Dedicated error for immediates/keywords which contain +invalid characters.")) + (defun file->tokens (file) "Opens FILE and parses returns a list of tokens, or NIL if the file could not be opened." - (defun read-tokens (tokens-so-far) - "Collects tokens in FILE into TOKENS-SO-FAR." + (defun read-instr (lst tokens-so-far) + "Collects tokens in FILE into TOKENS-SO-FAR, splitting on a newline." (let ((token (read-token))) - (if token - (read-tokens (cons token tokens-so-far)) - (reverse tokens-so-far)))) + (cond ((null token) (reverse tokens-so-far)) + ((eq token 'nl) + (cons (reverse tokens-so-far) (read-instr nil nil))) + (t (read-instr lst (cons token tokens-so-far)))))) (and (probe-file file) (with-open-file (*standard-input* file :direction :input) - (read-tokens '())))) + (remove nil (read-instr '() '()))))) (defun read-token () "Reads *STANDARD-INPUT* and returns a token, or nil if the end @@ -23,29 +38,49 @@ Comments start with a semi-colon ';' and all tokens after are ignored." (let ((chr (read-char *standard-input* nil))) (cond ((null chr) chr) + + ((char= chr #\linefeed) 'nl) + ((whitespace-char-p chr) (read-token)) ((char= chr #\;) (progn (read-line *standard-input* nil) - (read-token))) + 'nl)) ((char= chr #\() 'left-paren) ((char= chr #\)) 'right-paren) + ((char= chr #\:) 'colon) + ((char= chr #\$) 'dollar) + ((digit-char-p chr) (read-immediate chr)) ((alpha-char-p chr) - (read-identifier chr)) + (read-keyword chr)) (t (error (format nil "~a is not a valid lexical symbol.~%" chr)))))) (defun read-immediate (chr) - 'immediate) + (defun read-immediate-helper (chrs-so-far) + (let ((chr (peek-char nil *standard-input* nil))) + (cond ((and (not (null chr)) (digit-char-p chr)) + (read-immediate-helper (cons (read-char *standard-input* nil) chrs-so-far))) + ((and (not (null chr)) (alpha-char-p chr)) + (error 'invalid-immediate-or-keyword :chr chr :instance "immediate")) + (t (reverse chrs-so-far))))) + (parse-integer (coerce (read-immediate-helper (list chr)) 'string))) -(defun read-identifier (chr) - 'id) +(defun read-keyword (chr) + (defun read-keyword-helper (chrs-so-far) + (let ((chr (peek-char nil *standard-input* nil))) + (cond ((and (not (null chr)) (alpha-char-p chr)) + (read-keyword-helper (cons (read-char *standard-input* nil) chrs-so-far))) + ((and (not (null chr)) (digit-char-p chr)) + (error 'invalid-immediate-or-keyword :chr chr :instance "keyword")) + (t (reverse chrs-so-far))))) + (coerce (read-keyword-helper (list chr)) 'string)) (defun whitespace-char-p (x) (or (char= #\space x) diff --git a/src/main.lisp b/src/main.lisp index 98176ec..f6e5754 100644 --- a/src/main.lisp +++ b/src/main.lisp @@ -41,13 +41,14 @@ _/_/ _/_/ " (emit? (not (clingon:getopt cmd :parse)))) (cond ;; complain about num arguments - ((/= (length args) 1) (error "Wrong number of arguments.")) + ((/= (length args) 1) (error "Wrong number of arguments.~%")) ((not (util:asm-extension? file)) - (error "The file is not an asm source code file.")) + (error "The file is not an asm source code file.~%")) (t (let ((tokens (lex:file->tokens file))) (if tokens - (format t "~a" tokens) - (error "The file does not exist, or it could not be opened.")) + (progn (pprint tokens) + (terpri)) + (error "The file does not exist, or it could not be opened.~%")) (format t "Nitimur in Vetitum~%")))))) diff --git a/src/package.lisp b/src/package.lisp index 44399cb..670ed02 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -10,4 +10,5 @@ (:use #:cl) (:export #:file->tokens ;; exported for testing only - #:read-token)) + #:read-token + #:invalid-immediate-or-keyword)) diff --git a/t/lex.lisp b/t/lex.lisp index e210ecb..40698f9 100644 --- a/t/lex.lisp +++ b/t/lex.lisp @@ -14,6 +14,11 @@ (read-this "" (is (not (lex:read-token))))) +(test read-token-reads-nl + (read-this " +" + (is (eq (lex:read-token) 'lex::nl)))) + (test read-token-reads-left-paren (read-this "(" (is (eq (lex:read-token) 'lex::left-paren)))) @@ -22,6 +27,10 @@ (read-this ")" (is (eq (lex:read-token) 'lex::right-paren)))) +(test read-token-reads-left-paren + (read-this "$" + (is (eq (lex:read-token) 'lex::dollar)))) + (test read-token-ignores-space (read-this " (" (is (eq (lex:read-token) 'lex::left-paren)))) @@ -30,16 +39,35 @@ (read-this " (" (is (eq (lex:read-token) 'lex::left-paren)))) -(test read-token-ignores-newline - (read-this " -(" - (is (eq (lex:read-token) 'lex::left-paren)))) - (test read-token-ignores-comment (read-this "; this is a comment (" - (is (eq (lex:read-token) 'lex::left-paren)))) + (is (eq (lex:read-token) 'lex::nl)))) -(test read-token-ignores-comment-eof - (read-this ";" - (is (not (lex:read-token))))) +(test read-token-immediate-zero + (read-this "0" + (is (= (lex:read-token) 0)))) + +(test read-token-immediate-all-digits + (read-this "123456789" + (is (= (lex:read-token) 123456789)))) + +(test read-token-immediate-invalid-immediate + (handler-case + (progn (read-this "0v0" (lex:read-token)) + (fail)) + (lex:invalid-immediate-or-keyword ()))) + +(test read-token-keyword-single + (read-this "a" + (is (string= (lex:read-token) "a")))) + +(test read-token-keyword-add + (read-this "addi" + (is (string= (lex:read-token) "addi")))) + +(test read-token-immediate-invalid-keyword + (handler-case + (progn (read-this "sub0" (lex:read-token)) + (fail)) + (lex:invalid-immediate-or-keyword ()))) -- cgit v1.2.3 From 0fe2cc70abacc7c9e7aa2602836c8226bb1a1dc3 Mon Sep 17 00:00:00 2001 From: bd Date: Mon, 7 Apr 2025 23:58:29 -0400 Subject: Add label processing, mnemonic and label lookup maps --- README.md | 11 ++--------- rva.asd | 4 +++- src/package.lisp | 11 ++++++++++- src/parse.lisp | 21 +++++++++++++++++++++ src/util.lisp | 32 ++++++++++++++++++++++++++++++++ t/parse.lisp | 20 ++++++++++++++++++++ t/util.lisp | 12 ++++++++++++ 7 files changed, 100 insertions(+), 11 deletions(-) create mode 100644 src/parse.lisp create mode 100644 t/parse.lisp (limited to 'src/package.lisp') diff --git a/README.md b/README.md index 58c8201..aa61af6 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,6 @@ # rva - _/_/ _/_/ - _/ _/ - _/ _/ _/_/ _/ _/ _/_/_/ _/ - _/ _/_/ _/ _/ _/ _/ _/ - _/ _/ _/ _/ _/ _/ _/ - _/ _/ _/ _/_/_/ _/ - _/_/ _/_/ - -This is an assembler for the custom ISA nicknamed "RISC V[ECTOR]". It takes in an assembly program syntactically similar to MIPS (see input) and outputs a list of binary numbers corresponding to the instructions. This the output is compatible with the [RISC V[ECTOR]](https://github.com/bdunahu/RISC-V-ECTOR-) simulator. +This is an assembler for a custom ISA nicknamed "RISC V[ECTOR]". It takes in an assembly program syntactically similar to MIPS (see input) and outputs a list of binary numbers corresponding to the instructions. The output is compatible with the [RISC V[ECTOR]](https://github.com/bdunahu/RISC-V-ECTOR-) simulator. ## Dependencies @@ -27,4 +19,5 @@ Run `make` to produce a binary file in `/bin/`. To run the unit tests, run `make # About Created at the University of Massachusetts, Amherst + CS535 -- Computer Architecture and ISA Design \ No newline at end of file diff --git a/rva.asd b/rva.asd index 75300a8..7fdb00b 100644 --- a/rva.asd +++ b/rva.asd @@ -16,6 +16,7 @@ :components ((:file "package") (:file "util") (:file "lex") + (:file "parse") (:file "main")))) :long-description #.(uiop:read-file-string @@ -36,7 +37,8 @@ :components ((:file "package") (:file "main") (:file "util") - (:file "lex")))) + (:file "lex") + (:file "parse")))) :perform (test-op (o s) (uiop:symbol-call :rva-tests :test-rva))) (defmethod asdf:perform ((o asdf:image-op) (c asdf:system)) diff --git a/src/package.lisp b/src/package.lisp index 670ed02..d999783 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -4,7 +4,10 @@ (defpackage #:util (:use #:cl) - (:export #:asm-extension?)) + (:export #:asm-extension? + #:format-as-binary + #:label-loc + #:mnemonic-loc)) (defpackage #:lex (:use #:cl) @@ -12,3 +15,9 @@ ;; exported for testing only #:read-token #:invalid-immediate-or-keyword)) + +(defpackage #:parse + (:use #:cl) + (:export #:tokens->ast + ;; exported for testing only + #:extract-label)) diff --git a/src/parse.lisp b/src/parse.lisp new file mode 100644 index 0000000..8bd8c50 --- /dev/null +++ b/src/parse.lisp @@ -0,0 +1,21 @@ +(in-package #:parse) + +(defun tokens->ast (program) + (let ((program (remove nil (mapcar #'extract-label program)))) + ;; TODO + program)) + +(let ((i 0)) + (defun extract-label (line) + "Given a series of tokens LINE, determines if LINE is +in the form STRING {colon}. If it is, then it is treated as a +label, and pushed onto the stack with the line index. + +Note that this function is intended to be called using mapcar, +so that labels can be added to a map and otherwise removed from +processing." + (trivia:match line + ((list (and id (type string)) + (satisfies (lambda (x) (equal x 'lex::colon)))) + (progn (push (cons (read-from-string id) i) util:label-loc) nil)) + (_ (progn (incf i) line))))) diff --git a/src/util.lisp b/src/util.lisp index 87e4df9..1ea6dfc 100644 --- a/src/util.lisp +++ b/src/util.lisp @@ -3,3 +3,35 @@ (defun asm-extension? (file) "Returns t if FILE is extended with .asm, nil otherwise." (string= (pathname-type file) "asm")) + +;; TODO this won't work for negative numbers of odd sizes quite yet. +(defun format-as-binary (num len) + "Formats NUM as a binary number, and pads to LEN with zeros." + (declare (type number num)) + (declare (type (integer 0 *) len)) + (format nil "~V,'0b" len num)) + +(defmacro generate-type-map (type opsize ops) + "Generates an alist where the key corresponds to an element in +OPS, while the value is the index of that key (padded to OPSIZE) +concatenated with TYPE." + `(let ((i 0)) + (mapcar (lambda (x) + (incf i) + (cons x (concatenate 'string ,type + (format-as-binary i ,opsize)))) + ,ops))) + +(defparameter label-loc '() + "A symbol table mapping label names to line indices.") + +(defparameter mnemonic-loc + `(,@(generate-type-map "00" 5 + '(ADD SUB MUL QUOT REM SFTR SFTL AND OR NOT + XOR ADDV SUBV MULV DIVV CMP CEV)) + ,@(generate-type-map "01" 4 + '(LOAD LOADV ADDI SUBI SFTRI SFTLI ANDI ORI + XORI STORE STOREV)) + ,@(generate-type-map "10" 4 + '(JMP JRL JAL BEQ BGT BUF BOF PUSH POP))) + "An alist mapping known mnemonics to their binary representation.") diff --git a/t/parse.lisp b/t/parse.lisp new file mode 100644 index 0000000..2ab3e76 --- /dev/null +++ b/t/parse.lisp @@ -0,0 +1,20 @@ +(in-package #:rva-tests) + +(def-suite parse-tests + :description "Test functions exported from the parser." + :in all-tests) + +(in-suite parse-tests) + +(test extract-label-is-a-label + (is (not (parse:extract-label '("LOOP" lex::colon))))) + +(test extract-label-not-a-label-one + (let ((lst '("NICE" "TRY"))) + (is (equal lst + (parse:extract-label lst))))) + +(test extract-label-not-a-label-two + (let ((lst '("LOOP" lex::colon lex::colon))) + (is (equal lst + (parse:extract-label lst))))) diff --git a/t/util.lisp b/t/util.lisp index ef59fbb..c2dafab 100644 --- a/t/util.lisp +++ b/t/util.lisp @@ -14,3 +14,15 @@ (test asm-extension?-returns-true-obvious-case (is (util:asm-extension? "quux.asm"))) + +(test format-as-binary-unsigned-no-pad + (is (string= (util:format-as-binary 0 0) + "0"))) + +(test format-as-binary-unsigned-no-pad-fourty-two + (is (string= (util:format-as-binary 42 0) + "101010"))) + +(test format-as-binary-unsigned-pad-fourty-two + (is (string= (util:format-as-binary 42 10) + "0000101010"))) -- cgit v1.2.3 From b85c10ba1c53f1b442fea6bde4c2a2f73cfe5d6b Mon Sep 17 00:00:00 2001 From: bd Date: Tue, 8 Apr 2025 01:56:18 -0400 Subject: Simplify lexer-error handling, skeletion parsing functions for types --- src/lex.lisp | 30 +++++++++++++++--------------- src/package.lisp | 17 ++++++++++------- src/parse.lisp | 42 ++++++++++++++++++++++++++++++++++++++---- src/util.lisp | 32 +++++++++++--------------------- t/lex.lisp | 4 ++-- t/parse.lisp | 14 ++++++++++---- 6 files changed, 86 insertions(+), 53 deletions(-) (limited to 'src/package.lisp') diff --git a/src/lex.lisp b/src/lex.lisp index d5c77a1..5b1457d 100644 --- a/src/lex.lisp +++ b/src/lex.lisp @@ -1,18 +1,12 @@ (in-package #:lex) -(define-condition invalid-immediate-or-keyword (error) - ((chr :initarg :chr - :initform nil - :reader chr) - (instance :initarg :instance - :initform nil - :reader instance)) +(define-condition lexer-error (error) + ((message :initarg :message + :initform nil + :reader message)) (:report (lambda (condition stream) - (format stream - "LEX failed--encountered ~a while reading ~a." - (chr condition) (instance condition)))) - (:documentation "Dedicated error for immediates/keywords which contain -invalid characters.")) + (format stream "~A" (message condition)))) + (:documentation "Dedicated error for an invalid lex.")) (defun file->tokens (file) "Opens FILE and parses returns a list of tokens, or @@ -63,7 +57,9 @@ Comments start with a semi-colon ';' and all tokens after are ignored." ((alpha-char-p chr) (read-keyword chr)) - (t (error (format nil "~a is not a valid lexical symbol.~%" chr)))))) + (t (error 'lexer-error + :message + (format nil "LEX failled--~a is not a valid lexical symbol.~%" chr)))))) (defun read-immediate (chr) "Reads a sequence of digits, in base 2, 8, 10, or 16.. Throws @@ -74,7 +70,9 @@ Comments start with a semi-colon ';' and all tokens after are ignored." (cond ((and (not (null chr)) (digit-char-p chr)) (read-immediate-helper (cons (read-char *standard-input* nil) chrs-so-far))) ((and (not (null chr)) (alpha-char-p chr)) - (error 'invalid-immediate-or-keyword :chr chr :instance "immediate")) + (error 'lexer-error + :message + (format nil "LEX failed--encountered ~a while reading immediate.~%" chr))) (t (reverse chrs-so-far))))) (let* ((next (peek-char nil *standard-input* nil)) @@ -99,7 +97,9 @@ error if a digit is encountered." (cond ((and (not (null chr)) (alpha-char-p chr)) (read-keyword-helper (cons (read-char *standard-input* nil) chrs-so-far))) ((and (not (null chr)) (digit-char-p chr)) - (error 'invalid-immediate-or-keyword :chr chr :instance "keyword")) + (error 'lexer-error + :message + (format nil "LEX failed--encountered ~a while reading keyword.~%" chr))) (t (reverse chrs-so-far))))) (coerce (read-keyword-helper (list chr)) 'string)) diff --git a/src/package.lisp b/src/package.lisp index d999783..3364856 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -1,4 +1,4 @@ -(defpackage #:rva +helper(defpackage #:rva (:use #:cl) (:export #:main)) @@ -6,18 +6,21 @@ (:use #:cl) (:export #:asm-extension? #:format-as-binary - #:label-loc - #:mnemonic-loc)) + #:type-r + #:type-i + #:type-j + #:label-loc)) (defpackage #:lex (:use #:cl) - (:export #:file->tokens + (:export #:lexer-error + #:file->tokens ;; exported for testing only - #:read-token - #:invalid-immediate-or-keyword)) + #:read-token)) (defpackage #:parse (:use #:cl) - (:export #:tokens->ast + (:export #:parser-error + #:tokens->ast ;; exported for testing only #:extract-label)) diff --git a/src/parse.lisp b/src/parse.lisp index 8bd8c50..3052583 100644 --- a/src/parse.lisp +++ b/src/parse.lisp @@ -1,9 +1,20 @@ -(in-package #:parse) +helper(in-package #:parse) + +(define-condition parser-error (error) + ((message :initarg :message + :initform nil + :reader message)) + (:report (lambda (condition stream) + (format stream "~A" (message condition)))) + (:documentation "Dedicated error for an invalid parse.")) (defun tokens->ast (program) - (let ((program (remove nil (mapcar #'extract-label program)))) - ;; TODO - program)) + "Given PROGRAM, which is a list of lists of symbols, +filters out the labels and parses." + ;; TODO add directives + (let ((program (remove nil (mapcar #'extract-label program))) + (i 0)) + (mapcar (lambda (l) (extract-instruction l i)) program))) (let ((i 0)) (defun extract-label (line) @@ -19,3 +30,26 @@ processing." (satisfies (lambda (x) (equal x 'lex::colon)))) (progn (push (cons (read-from-string id) i) util:label-loc) nil)) (_ (progn (incf i) line))))) + +(defun extract-instruction (line i) + "Given instruction LINE, determines the expected type format and passes +LINE and the index I to the the respective function." + ;; TODO add pseudo-ops (i.e., nop, mov, ...) + (let* ((type-map '((r-type . extract-r-type) + (i-type . extract-i-type) + (j-type . extract-j-type))) + (keyword (car line)) + (type-fn (cdr (assoc keyword type-map)))) + (if type-fn + (funcall type-fn line i) + (error 'parser-error + (format nil "PARSE failed--~a is not a known keyword.~%" (keyword)))))) + +(defun extract-r-type (line i) + 'r) + +(defun extract-i-type (line i) + 'i) + +(defun extract-j-type (line i) + 'j) diff --git a/src/util.lisp b/src/util.lisp index 1ea6dfc..5edee4a 100644 --- a/src/util.lisp +++ b/src/util.lisp @@ -11,27 +11,17 @@ (declare (type (integer 0 *) len)) (format nil "~V,'0b" len num)) -(defmacro generate-type-map (type opsize ops) - "Generates an alist where the key corresponds to an element in -OPS, while the value is the index of that key (padded to OPSIZE) -concatenated with TYPE." - `(let ((i 0)) - (mapcar (lambda (x) - (incf i) - (cons x (concatenate 'string ,type - (format-as-binary i ,opsize)))) - ,ops))) +(defparameter type-r + '(ADD SUB MUL QUOT REM SFTR SFTL AND OR NOT XOR ADDV SUBV MULV DIVV CMP CEV) + "R-type instructions.") + +(defparameter type-i + '(LOAD LOADV ADDI SUBI SFTRI SFTLI ANDI ORI XORI STORE STOREV) + "I-type instructions.") + +(defparameter type-j + '(JMP JRL JAL BEQ BGT BUF BOF PUSH POP) + "J-type instructions.") (defparameter label-loc '() "A symbol table mapping label names to line indices.") - -(defparameter mnemonic-loc - `(,@(generate-type-map "00" 5 - '(ADD SUB MUL QUOT REM SFTR SFTL AND OR NOT - XOR ADDV SUBV MULV DIVV CMP CEV)) - ,@(generate-type-map "01" 4 - '(LOAD LOADV ADDI SUBI SFTRI SFTLI ANDI ORI - XORI STORE STOREV)) - ,@(generate-type-map "10" 4 - '(JMP JRL JAL BEQ BGT BUF BOF PUSH POP))) - "An alist mapping known mnemonics to their binary representation.") diff --git a/t/lex.lisp b/t/lex.lisp index dfa632a..7a20608 100644 --- a/t/lex.lisp +++ b/t/lex.lisp @@ -76,7 +76,7 @@ (handler-case (progn (read-this "0v0" (lex:read-token)) (fail)) - (lex:invalid-immediate-or-keyword ()))) + (lex:lexer-error ()))) ;; do we want a custom error for this too? (test read-token-immediate-radix @@ -97,4 +97,4 @@ (handler-case (progn (read-this "sub0" (lex:read-token)) (fail)) - (lex:invalid-immediate-or-keyword ()))) + (lex:lexer-error ()))) diff --git a/t/parse.lisp b/t/parse.lisp index 2ab3e76..bd1310f 100644 --- a/t/parse.lisp +++ b/t/parse.lisp @@ -11,10 +11,16 @@ (test extract-label-not-a-label-one (let ((lst '("NICE" "TRY"))) - (is (equal lst - (parse:extract-label lst))))) + (is (equal lst + (parse:extract-label lst))))) (test extract-label-not-a-label-two (let ((lst '("LOOP" lex::colon lex::colon))) - (is (equal lst - (parse:extract-label lst))))) + (is (equal lst + (parse:extract-label lst))))) + +(test extract-line-invalid-type + (handler-case + (progn (parse:tokens->ast '(("foo" LEX::DOLLAR))) + (fail)) + (lex:parser-error ()))) -- cgit v1.2.3