diff options
author | bd <bdunahu@operationnull.com> | 2025-04-09 20:25:57 -0400 |
---|---|---|
committer | bd <bdunahu@operationnull.com> | 2025-04-09 20:25:57 -0400 |
commit | ce80845b5cb5b473327e4e561342490576d83a90 (patch) | |
tree | 90f7189982384c2e4de2f58a3f2daac4adf80514 /src | |
parent | 6ba1871c3825e17d33b96ffd3051239dfe18d61a (diff) |
Successfully parse every instruction type
Diffstat (limited to 'src')
-rw-r--r-- | src/lex.lisp | 110 | ||||
-rw-r--r-- | src/main.lisp | 21 | ||||
-rw-r--r-- | src/package.lisp | 12 | ||||
-rw-r--r-- | src/parse.lisp | 95 |
4 files changed, 59 insertions, 179 deletions
diff --git a/src/lex.lisp b/src/lex.lisp deleted file mode 100644 index c86d17c..0000000 --- a/src/lex.lisp +++ /dev/null @@ -1,110 +0,0 @@ -(in-package #:lex) - -(define-condition lexer-error (error) - ((message :initarg :message - :initform nil - :reader message)) - (:report (lambda (condition stream) - (format stream "~A" (message condition)))) - (:documentation "Dedicated error for an invalid lex.")) - -(defun file->tokens (file) - "Opens FILE and parses returns a list of tokens, or -NIL if the file could not be opened." - - (defun read-instr (lst tokens-so-far) - "Collects tokens in FILE into TOKENS-SO-FAR, splitting on a newline." - (let ((token (read-token))) - (cond ((null token) (reverse tokens-so-far)) - ((eq token 'nl) - (cons (reverse tokens-so-far) (read-instr nil nil))) - (t (read-instr lst (cons token tokens-so-far)))))) - - (and (probe-file file) - (with-open-file (*standard-input* file :direction :input) - (remove nil (read-instr '() '()))))) - -(defun read-token () - "Reads *STANDARD-INPUT* and returns a token, or nil if the end -of file has been reached. -Whitespace, commas, colons, and parentheses are token delimiters. -Comments start with a semi-colon ';' and all tokens after are ignored." - (let ((chr (read-char *standard-input* nil))) - (cond - ((null chr) chr) - - ((char= chr #\linefeed) 'nl) - - ((whitespace-char-p chr) - (read-token)) - - ((char= chr #\;) - (progn (read-line *standard-input* nil) - 'nl)) - - ((char= chr #\.) 'period) - - ((char= chr #\() 'left-paren) - ((char= chr #\)) 'right-paren) - - ((char= chr #\:) 'colon) - ((char= chr #\$) 'dollar) - - ((char= chr #\+) 'plus) - ((char= chr #\-) 'minus) - - ((digit-char-p chr) - (read-immediate chr)) - - ((alpha-char-p chr) - (read-keyword chr)) - - (t (error 'lexer-error - :message - (format nil "LEX failled--~a is not a valid lexical symbol.~%" chr)))))) - -(defun read-immediate (chr) - "Reads a sequence of digits, in base 2, 8, 10, or 16.. Throws -`invalid-immediate-or-keyword' error if an alphabetic character is encountered." - ;; may be combined with read-keyword-helper - (defun read-immediate-helper (chrs-so-far) - (let ((chr (peek-char nil *standard-input* nil))) - (cond ((and (not (null chr)) (digit-char-p chr)) - (read-immediate-helper (cons (read-char *standard-input* nil) chrs-so-far))) - ((and (not (null chr)) (alpha-char-p chr)) - (error 'lexer-error - :message - (format nil "LEX failed--encountered ~a while reading immediate.~%" chr))) - (t (reverse chrs-so-far))))) - - (let* ((next (peek-char nil *standard-input* nil)) - (radix (cond ((null next) 10) - ((char= next #\b) 2) - ((char= next #\o) 8) - ((char= next #\x) 16) - ((alpha-char-p next) nil) - (t 10))) - (arg (list chr))) - (when (and (char= chr #\0) radix (not (= radix 10))) - (read-char *standard-input* nil) - (setq arg '())) - (parse-integer (coerce (read-immediate-helper arg) 'string) :radix radix))) - -(defun read-keyword (chr) - "Reads a sequence of alphabetic characters. Throws `invalid-immediate-or-keyword' -error if a digit is encountered." - ;; may be combined with read-immediate-helper - (defun read-keyword-helper (chrs-so-far) - (let ((chr (peek-char nil *standard-input* nil))) - (cond ((and (not (null chr)) (alpha-char-p chr)) - (read-keyword-helper (cons (read-char *standard-input* nil) chrs-so-far))) - ((and (not (null chr)) (digit-char-p chr)) - (error 'lexer-error - :message - (format nil "LEX failed--encountered ~a while reading keyword.~%" chr))) - (t (reverse chrs-so-far))))) - (coerce (read-keyword-helper (list chr)) 'string)) - -(defun whitespace-char-p (x) - (or (char= #\space x) - (not (graphic-char-p x)))) diff --git a/src/main.lisp b/src/main.lisp index 8a36a66..9180d57 100644 --- a/src/main.lisp +++ b/src/main.lisp @@ -19,13 +19,6 @@ _/_/ _/_/ " (list (clingon:make-option :flag - :description "run the lexer, but stop before parsing" - :long-name "lex" - :short-name #\l - :required nil - :key :lex) - (clingon:make-option - :flag :description "run the parser, but stop before emission" :long-name "parse" :short-name #\p @@ -37,24 +30,18 @@ _/_/ _/_/ " (print-splash) (let* ((args (clingon:command-arguments cmd)) (file (car args)) - (parse? (not (clingon:getopt cmd :lex))) - (emit? (not (clingon:getopt cmd :parse)))) + (emit? (not (clingon:getopt cmd :parse)))) (cond ;; complain about num arguments ((/= (length args) 1) (error "Wrong number of arguments.~%")) ((not (util:asm-extension? file)) (error "The file is not an asm source code file.~%")) - (t (let ((tokens (lex:file->tokens file))) - (if tokens - (progn (pprint tokens) + (t (let ((str (uiop:read-file-string file))) + (if str + (progn (pprint (esrap:parse 'parse:text (string-upcase str))) (terpri) (format t "---~%")) (error "The file does not exist, or it could not be opened.~%")) - (if parse? - (let ((tokens (parse:tokens->ast tokens))) - (progn (pprint tokens) - (terpri) - (format t "---~%")))) (format t "Nitimur in Vetitum~%")))))) diff --git a/src/package.lisp b/src/package.lisp index b856e38..2b01d15 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -10,18 +10,10 @@ #:iota #:riffle)) -(defpackage #:lex - (:use #:cl) - (:export #:lexer-error - #:file->tokens - ;; exported for testing only - #:read-token)) - (defpackage #:parse (:use #:cl) - (:export #:parser-error - #:tokens->ast - ;; exported for testing only + (:export ;; exported for testing only + #:text #:register #:instr )) diff --git a/src/parse.lisp b/src/parse.lisp index 54b0709..a92eae7 100644 --- a/src/parse.lisp +++ b/src/parse.lisp @@ -1,22 +1,24 @@ (in-package #:parse) (esrap:defrule space - (+ (or #\Space #\Tab)) + (+ (or #\space #\tab)) (:constant nil)) +(esrap:defrule newline + (+ #\newline)) + ;;; defines rules to parse an integer in various bases -(esrap:defrule binary (and #\0 #\b (+ (or "0" "1"))) +(esrap:defrule binary (and #\0 #\B (+ (or "0" "1"))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 2))) -(esrap:defrule octal (and #\0 #\o (+ (or (esrap:character-ranges (#\0 #\7))))) +(esrap:defrule octal (and #\0 #\O (+ (or (esrap:character-ranges (#\0 #\7))))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 8))) (esrap:defrule decimal (+ (or (esrap:character-ranges (#\0 #\9)))) (:lambda (list) (parse-integer (esrap:text list) :radix 10))) -(esrap:defrule hex (and #\0 #\x (+ (or (esrap:character-ranges (#\0 #\9)) - "a" "b" "c" "d" "e" "f" +(esrap:defrule hex (and #\0 #\X (+ (or (esrap:character-ranges (#\0 #\9)) "A" "B" "C" "D" "E" "F"))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 16))) @@ -30,7 +32,10 @@ (esrap:defrule dereference (and (esrap:? (or #\+ #\-)) int #\( register #\)) (:destructure (s i1 w1 r w2) (declare (ignore w1 w2)) - (list r (if (and s (string= s "-")) (- i1) i1)))) + (list r (list 'imm (if (and s (string= s "-")) (- i1) i1))))) + +(esrap:defrule immediate int + (:lambda (i) (list 'imm i))) ;;; defines rules to parse labels @@ -56,6 +61,11 @@ (esrap:defrule j-type-3-m (or "PUSH" "POP")) (defmacro defrule-instr (name type-id order &rest destructure-pattern) + "Defines the boilerplate for a common esrap instruction rule. +NAME is the name of the non-terminal symbol. +TYPE-ID is the symbol which appears as the first element of a successful parse. +ORDER is the order to place the parsed tokens in the resulting list. +DESTRUCTURE-PATTERN is the list of non-terminals on the right side of the grammar rule." (let* ((pattern-size (length destructure-pattern)) (spaces (mapcar (lambda (x) (read-from-string (format nil "w~A" x))) (util:iota pattern-size))) (vars (mapcar (lambda (x) (read-from-string (format nil "s~A" x))) (util:iota pattern-size)))) @@ -68,40 +78,41 @@ (defrule-instr r-type-1 'r (1 2 0) register register register) (defrule-instr r-type-2 'r (1 2 0) register register) (defrule-instr r-type-3 'r (0 1 2) register register) -;;(defrule-instr j-type-1 'j (0 1) dereference) + +(esrap:defrule i-type-1 (and i-type-1-m space register space dereference) + (:destructure (m w1 s w2 di) + (declare (ignore w1 w2)) + `(i ,m ,s ,@di))) + +(esrap:defrule i-type-2 (and i-type-2-m space register space dereference) + (:destructure (m w1 s w2 di) + (declare (ignore w1 w2)) + `(i ,m ,@(util:insert-in-middle di s)))) + +(defrule-instr i-type-3 'i (0 1 2) register register immediate) +(esrap:defrule j-type-1 (and j-type-1-m space dereference) + (:destructure (m w di) + (declare (ignore w)) + `(j ,m ,@di))) + (defrule-instr j-type-2 'j (1 0) label) -(defrule-instr j-type-3 'j (0 1) register) - -;; (esrap:defrule i-type-1 (and i-type-1-m space register space dereference) -;; (:destructure (m w1 s w2 di) -;; (declare (ignore w1 w2)) -;; `(i ,m ,s ,@di))) -;; (esrap:defrule i-type-2 (and i-type-2-m space register space dereference) -;; (:destructure (m w1 s w2 di) -;; (declare (ignore w1 w2)) -;; `(i ,m ,@(util:insert-in-middle di s)))) -;; (esrap:defrule i-type-3 (and i-type-3-m space register space register space int) -;; (:destructure (m w1 s1 w2 s2 w3 i) -;; (declare (ignore w1 w2 w3)) -;; (list i m s1 s2 i))) -;; (esrap:defrule j-type-1 (and j-type-1-m space dereference) -;; (:destructure (m w di) -;; (declare (ignore w)) -;; `(j ,m ,@di))) -;; (esrap:defrule j-type-2 (and j-type-2-m space label) -;; (:destructure (m w label) -;; (declare (ignore w)) -;; (list j m "00000" label))) -;; (esrap:defrule j-type-3 (and j-type-3-m space register) -;; (:destructure (m w r) -;; (declare (ignore w)) -;; (list j m r "00000"))) - -(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 j-type-2 j-type-3)) - - -;; (esrap:parse 'register "$3") - -;; (esrap:parse 'j-type-1 "JMP 3($3)") -;; (esrap:parse 'j-type-2 "JRL FOO") -;; (esrap:parse 'j-type-3 "PUSH $1") +(esrap:defrule j-type-3 (and j-type-3-m space register) + (:destructure (m w r) + (declare (ignore w)) + `(j ,m ,r (imm 0)))) + +(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 i-type-1 i-type-2 + i-type-3 j-type-1 j-type-2 j-type-3 label-decl)) + +;;; defines rules to parse the .text segment + +(esrap:defrule instr-clean (and (esrap:? space) instr newline) + (:destructure (w1 i w2) + (declare (ignore nl l)) + i)) + +(esrap:defrule text (and ".TEXT" newline + (* instr-clean)) + (:destructure (txt nl is) + (declare (ignore txt nl)) + (list 'text is))) |