summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorbd <bdunahu@operationnull.com>2025-04-09 20:25:57 -0400
committerbd <bdunahu@operationnull.com>2025-04-09 20:25:57 -0400
commitce80845b5cb5b473327e4e561342490576d83a90 (patch)
tree90f7189982384c2e4de2f58a3f2daac4adf80514 /src
parent6ba1871c3825e17d33b96ffd3051239dfe18d61a (diff)
Successfully parse every instruction type
Diffstat (limited to 'src')
-rw-r--r--src/lex.lisp110
-rw-r--r--src/main.lisp21
-rw-r--r--src/package.lisp12
-rw-r--r--src/parse.lisp95
4 files changed, 59 insertions, 179 deletions
diff --git a/src/lex.lisp b/src/lex.lisp
deleted file mode 100644
index c86d17c..0000000
--- a/src/lex.lisp
+++ /dev/null
@@ -1,110 +0,0 @@
-(in-package #:lex)
-
-(define-condition lexer-error (error)
- ((message :initarg :message
- :initform nil
- :reader message))
- (:report (lambda (condition stream)
- (format stream "~A" (message condition))))
- (:documentation "Dedicated error for an invalid lex."))
-
-(defun file->tokens (file)
- "Opens FILE and parses returns a list of tokens, or
-NIL if the file could not be opened."
-
- (defun read-instr (lst tokens-so-far)
- "Collects tokens in FILE into TOKENS-SO-FAR, splitting on a newline."
- (let ((token (read-token)))
- (cond ((null token) (reverse tokens-so-far))
- ((eq token 'nl)
- (cons (reverse tokens-so-far) (read-instr nil nil)))
- (t (read-instr lst (cons token tokens-so-far))))))
-
- (and (probe-file file)
- (with-open-file (*standard-input* file :direction :input)
- (remove nil (read-instr '() '())))))
-
-(defun read-token ()
- "Reads *STANDARD-INPUT* and returns a token, or nil if the end
-of file has been reached.
-Whitespace, commas, colons, and parentheses are token delimiters.
-Comments start with a semi-colon ';' and all tokens after are ignored."
- (let ((chr (read-char *standard-input* nil)))
- (cond
- ((null chr) chr)
-
- ((char= chr #\linefeed) 'nl)
-
- ((whitespace-char-p chr)
- (read-token))
-
- ((char= chr #\;)
- (progn (read-line *standard-input* nil)
- 'nl))
-
- ((char= chr #\.) 'period)
-
- ((char= chr #\() 'left-paren)
- ((char= chr #\)) 'right-paren)
-
- ((char= chr #\:) 'colon)
- ((char= chr #\$) 'dollar)
-
- ((char= chr #\+) 'plus)
- ((char= chr #\-) 'minus)
-
- ((digit-char-p chr)
- (read-immediate chr))
-
- ((alpha-char-p chr)
- (read-keyword chr))
-
- (t (error 'lexer-error
- :message
- (format nil "LEX failled--~a is not a valid lexical symbol.~%" chr))))))
-
-(defun read-immediate (chr)
- "Reads a sequence of digits, in base 2, 8, 10, or 16.. Throws
-`invalid-immediate-or-keyword' error if an alphabetic character is encountered."
- ;; may be combined with read-keyword-helper
- (defun read-immediate-helper (chrs-so-far)
- (let ((chr (peek-char nil *standard-input* nil)))
- (cond ((and (not (null chr)) (digit-char-p chr))
- (read-immediate-helper (cons (read-char *standard-input* nil) chrs-so-far)))
- ((and (not (null chr)) (alpha-char-p chr))
- (error 'lexer-error
- :message
- (format nil "LEX failed--encountered ~a while reading immediate.~%" chr)))
- (t (reverse chrs-so-far)))))
-
- (let* ((next (peek-char nil *standard-input* nil))
- (radix (cond ((null next) 10)
- ((char= next #\b) 2)
- ((char= next #\o) 8)
- ((char= next #\x) 16)
- ((alpha-char-p next) nil)
- (t 10)))
- (arg (list chr)))
- (when (and (char= chr #\0) radix (not (= radix 10)))
- (read-char *standard-input* nil)
- (setq arg '()))
- (parse-integer (coerce (read-immediate-helper arg) 'string) :radix radix)))
-
-(defun read-keyword (chr)
- "Reads a sequence of alphabetic characters. Throws `invalid-immediate-or-keyword'
-error if a digit is encountered."
- ;; may be combined with read-immediate-helper
- (defun read-keyword-helper (chrs-so-far)
- (let ((chr (peek-char nil *standard-input* nil)))
- (cond ((and (not (null chr)) (alpha-char-p chr))
- (read-keyword-helper (cons (read-char *standard-input* nil) chrs-so-far)))
- ((and (not (null chr)) (digit-char-p chr))
- (error 'lexer-error
- :message
- (format nil "LEX failed--encountered ~a while reading keyword.~%" chr)))
- (t (reverse chrs-so-far)))))
- (coerce (read-keyword-helper (list chr)) 'string))
-
-(defun whitespace-char-p (x)
- (or (char= #\space x)
- (not (graphic-char-p x))))
diff --git a/src/main.lisp b/src/main.lisp
index 8a36a66..9180d57 100644
--- a/src/main.lisp
+++ b/src/main.lisp
@@ -19,13 +19,6 @@ _/_/ _/_/ "
(list
(clingon:make-option
:flag
- :description "run the lexer, but stop before parsing"
- :long-name "lex"
- :short-name #\l
- :required nil
- :key :lex)
- (clingon:make-option
- :flag
:description "run the parser, but stop before emission"
:long-name "parse"
:short-name #\p
@@ -37,24 +30,18 @@ _/_/ _/_/ "
(print-splash)
(let* ((args (clingon:command-arguments cmd))
(file (car args))
- (parse? (not (clingon:getopt cmd :lex)))
- (emit? (not (clingon:getopt cmd :parse))))
+ (emit? (not (clingon:getopt cmd :parse))))
(cond
;; complain about num arguments
((/= (length args) 1) (error "Wrong number of arguments.~%"))
((not (util:asm-extension? file))
(error "The file is not an asm source code file.~%"))
- (t (let ((tokens (lex:file->tokens file)))
- (if tokens
- (progn (pprint tokens)
+ (t (let ((str (uiop:read-file-string file)))
+ (if str
+ (progn (pprint (esrap:parse 'parse:text (string-upcase str)))
(terpri)
(format t "---~%"))
(error "The file does not exist, or it could not be opened.~%"))
- (if parse?
- (let ((tokens (parse:tokens->ast tokens)))
- (progn (pprint tokens)
- (terpri)
- (format t "---~%"))))
(format t "Nitimur in Vetitum~%"))))))
diff --git a/src/package.lisp b/src/package.lisp
index b856e38..2b01d15 100644
--- a/src/package.lisp
+++ b/src/package.lisp
@@ -10,18 +10,10 @@
#:iota
#:riffle))
-(defpackage #:lex
- (:use #:cl)
- (:export #:lexer-error
- #:file->tokens
- ;; exported for testing only
- #:read-token))
-
(defpackage #:parse
(:use #:cl)
- (:export #:parser-error
- #:tokens->ast
- ;; exported for testing only
+ (:export ;; exported for testing only
+ #:text
#:register
#:instr
))
diff --git a/src/parse.lisp b/src/parse.lisp
index 54b0709..a92eae7 100644
--- a/src/parse.lisp
+++ b/src/parse.lisp
@@ -1,22 +1,24 @@
(in-package #:parse)
(esrap:defrule space
- (+ (or #\Space #\Tab))
+ (+ (or #\space #\tab))
(:constant nil))
+(esrap:defrule newline
+ (+ #\newline))
+
;;; defines rules to parse an integer in various bases
-(esrap:defrule binary (and #\0 #\b (+ (or "0" "1")))
+(esrap:defrule binary (and #\0 #\B (+ (or "0" "1")))
(:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 2)))
-(esrap:defrule octal (and #\0 #\o (+ (or (esrap:character-ranges (#\0 #\7)))))
+(esrap:defrule octal (and #\0 #\O (+ (or (esrap:character-ranges (#\0 #\7)))))
(:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 8)))
(esrap:defrule decimal (+ (or (esrap:character-ranges (#\0 #\9))))
(:lambda (list) (parse-integer (esrap:text list) :radix 10)))
-(esrap:defrule hex (and #\0 #\x (+ (or (esrap:character-ranges (#\0 #\9))
- "a" "b" "c" "d" "e" "f"
+(esrap:defrule hex (and #\0 #\X (+ (or (esrap:character-ranges (#\0 #\9))
"A" "B" "C" "D" "E" "F")))
(:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 16)))
@@ -30,7 +32,10 @@
(esrap:defrule dereference (and (esrap:? (or #\+ #\-)) int #\( register #\))
(:destructure (s i1 w1 r w2)
(declare (ignore w1 w2))
- (list r (if (and s (string= s "-")) (- i1) i1))))
+ (list r (list 'imm (if (and s (string= s "-")) (- i1) i1)))))
+
+(esrap:defrule immediate int
+ (:lambda (i) (list 'imm i)))
;;; defines rules to parse labels
@@ -56,6 +61,11 @@
(esrap:defrule j-type-3-m (or "PUSH" "POP"))
(defmacro defrule-instr (name type-id order &rest destructure-pattern)
+ "Defines the boilerplate for a common esrap instruction rule.
+NAME is the name of the non-terminal symbol.
+TYPE-ID is the symbol which appears as the first element of a successful parse.
+ORDER is the order to place the parsed tokens in the resulting list.
+DESTRUCTURE-PATTERN is the list of non-terminals on the right side of the grammar rule."
(let* ((pattern-size (length destructure-pattern))
(spaces (mapcar (lambda (x) (read-from-string (format nil "w~A" x))) (util:iota pattern-size)))
(vars (mapcar (lambda (x) (read-from-string (format nil "s~A" x))) (util:iota pattern-size))))
@@ -68,40 +78,41 @@
(defrule-instr r-type-1 'r (1 2 0) register register register)
(defrule-instr r-type-2 'r (1 2 0) register register)
(defrule-instr r-type-3 'r (0 1 2) register register)
-;;(defrule-instr j-type-1 'j (0 1) dereference)
+
+(esrap:defrule i-type-1 (and i-type-1-m space register space dereference)
+ (:destructure (m w1 s w2 di)
+ (declare (ignore w1 w2))
+ `(i ,m ,s ,@di)))
+
+(esrap:defrule i-type-2 (and i-type-2-m space register space dereference)
+ (:destructure (m w1 s w2 di)
+ (declare (ignore w1 w2))
+ `(i ,m ,@(util:insert-in-middle di s))))
+
+(defrule-instr i-type-3 'i (0 1 2) register register immediate)
+(esrap:defrule j-type-1 (and j-type-1-m space dereference)
+ (:destructure (m w di)
+ (declare (ignore w))
+ `(j ,m ,@di)))
+
(defrule-instr j-type-2 'j (1 0) label)
-(defrule-instr j-type-3 'j (0 1) register)
-
-;; (esrap:defrule i-type-1 (and i-type-1-m space register space dereference)
-;; (:destructure (m w1 s w2 di)
-;; (declare (ignore w1 w2))
-;; `(i ,m ,s ,@di)))
-;; (esrap:defrule i-type-2 (and i-type-2-m space register space dereference)
-;; (:destructure (m w1 s w2 di)
-;; (declare (ignore w1 w2))
-;; `(i ,m ,@(util:insert-in-middle di s))))
-;; (esrap:defrule i-type-3 (and i-type-3-m space register space register space int)
-;; (:destructure (m w1 s1 w2 s2 w3 i)
-;; (declare (ignore w1 w2 w3))
-;; (list i m s1 s2 i)))
-;; (esrap:defrule j-type-1 (and j-type-1-m space dereference)
-;; (:destructure (m w di)
-;; (declare (ignore w))
-;; `(j ,m ,@di)))
-;; (esrap:defrule j-type-2 (and j-type-2-m space label)
-;; (:destructure (m w label)
-;; (declare (ignore w))
-;; (list j m "00000" label)))
-;; (esrap:defrule j-type-3 (and j-type-3-m space register)
-;; (:destructure (m w r)
-;; (declare (ignore w))
-;; (list j m r "00000")))
-
-(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 j-type-2 j-type-3))
-
-
-;; (esrap:parse 'register "$3")
-
-;; (esrap:parse 'j-type-1 "JMP 3($3)")
-;; (esrap:parse 'j-type-2 "JRL FOO")
-;; (esrap:parse 'j-type-3 "PUSH $1")
+(esrap:defrule j-type-3 (and j-type-3-m space register)
+ (:destructure (m w r)
+ (declare (ignore w))
+ `(j ,m ,r (imm 0))))
+
+(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 i-type-1 i-type-2
+ i-type-3 j-type-1 j-type-2 j-type-3 label-decl))
+
+;;; defines rules to parse the .text segment
+
+(esrap:defrule instr-clean (and (esrap:? space) instr newline)
+ (:destructure (w1 i w2)
+ (declare (ignore nl l))
+ i))
+
+(esrap:defrule text (and ".TEXT" newline
+ (* instr-clean))
+ (:destructure (txt nl is)
+ (declare (ignore txt nl))
+ (list 'text is)))