summaryrefslogtreecommitdiff
path: root/src/parse.lisp
diff options
context:
space:
mode:
authorSiddarth Suresh <155843085+SiddarthSuresh98@users.noreply.github.com>2025-04-10 18:37:40 -0400
committerGitHub <noreply@github.com>2025-04-10 18:37:40 -0400
commit5dbf0b63988b42c112ca0087cbbbb090566df5c1 (patch)
treece8ee03b9d25b739a2e687c69b14d9221420e4fa /src/parse.lisp
parentcc1e5892a25949b996d69a0b07f151a276ef2570 (diff)
parentdf508744ec2975cec0ba05e8a4358c1c41265c4c (diff)
Merge pull request #2 from bdunahu/bdunahu
Finish parser, add .text and .data directives, variable names, label processing
Diffstat (limited to 'src/parse.lisp')
-rw-r--r--src/parse.lisp229
1 files changed, 174 insertions, 55 deletions
diff --git a/src/parse.lisp b/src/parse.lisp
index 3052583..d971444 100644
--- a/src/parse.lisp
+++ b/src/parse.lisp
@@ -1,55 +1,174 @@
-helper(in-package #:parse)
-
-(define-condition parser-error (error)
- ((message :initarg :message
- :initform nil
- :reader message))
- (:report (lambda (condition stream)
- (format stream "~A" (message condition))))
- (:documentation "Dedicated error for an invalid parse."))
-
-(defun tokens->ast (program)
- "Given PROGRAM, which is a list of lists of symbols,
-filters out the labels and parses."
- ;; TODO add directives
- (let ((program (remove nil (mapcar #'extract-label program)))
- (i 0))
- (mapcar (lambda (l) (extract-instruction l i)) program)))
-
-(let ((i 0))
- (defun extract-label (line)
- "Given a series of tokens LINE, determines if LINE is
-in the form STRING {colon}. If it is, then it is treated as a
-label, and pushed onto the stack with the line index.
-
-Note that this function is intended to be called using mapcar,
-so that labels can be added to a map and otherwise removed from
-processing."
- (trivia:match line
- ((list (and id (type string))
- (satisfies (lambda (x) (equal x 'lex::colon))))
- (progn (push (cons (read-from-string id) i) util:label-loc) nil))
- (_ (progn (incf i) line)))))
-
-(defun extract-instruction (line i)
- "Given instruction LINE, determines the expected type format and passes
-LINE and the index I to the the respective function."
- ;; TODO add pseudo-ops (i.e., nop, mov, ...)
- (let* ((type-map '((r-type . extract-r-type)
- (i-type . extract-i-type)
- (j-type . extract-j-type)))
- (keyword (car line))
- (type-fn (cdr (assoc keyword type-map))))
- (if type-fn
- (funcall type-fn line i)
- (error 'parser-error
- (format nil "PARSE failed--~a is not a known keyword.~%" (keyword))))))
-
-(defun extract-r-type (line i)
- 'r)
-
-(defun extract-i-type (line i)
- 'i)
-
-(defun extract-j-type (line i)
- 'j)
+(in-package #:parse)
+
+(defparameter line-number 0
+ "The number of real instructions processed up until this point.")
+(defparameter var-offset 0
+ "The number of variables processed up until this point.")
+
+(esrap:defrule space
+ (+ (or #\space #\tab))
+ (:constant nil))
+
+(esrap:defrule nl (+ #\newline)
+ (:constant nil))
+
+(esrap:defrule nl-inc (+ #\newline)
+ (:lambda (n)
+ (declare (ignore n))
+ (incf line-number)
+ nil))
+
+(esrap:defrule alpha (+ (alphanumericp character))
+ (:text t))
+
+;;; defines rules to parse an integer in various bases
+
+(defmacro define-number-rule ())
+
+(esrap:defrule binary (and #\0 #\B (+ (or "0" "1")))
+ (:lambda (e) (parse-integer (esrap:text (cddr e)) :radix 2)))
+
+(esrap:defrule octal (and #\0 #\O (+ (or (esrap:character-ranges (#\0 #\7)))))
+ (:lambda (e) (parse-integer (esrap:text (cddr e)) :radix 8)))
+
+(esrap:defrule decimal (+ (or (esrap:character-ranges (#\0 #\9))))
+ (:lambda (e) (parse-integer (esrap:text e) :radix 10)))
+
+(esrap:defrule hex (and #\0 #\X (+ (or (esrap:character-ranges (#\0 #\9))
+ "A" "B" "C" "D" "E" "F")))
+ (:lambda (e) (parse-integer (esrap:text (cddr e)) :radix 16)))
+
+(esrap:defrule int (or binary octal hex decimal))
+
+;;; defines rules to parse an operand
+
+(esrap:defrule register (and #\$ int)
+ (:function cadr)
+ (:lambda (e) (list 'emit::rr e)))
+
+(esrap:defrule var alpha
+ (:lambda (e) (list (list 'emit::rr 0) (list 'emit::var e))))
+
+(esrap:defrule dereference (and (esrap:? (or #\+ #\-)) int #\( register #\))
+ (:destructure (s i1 w1 r w2)
+ (declare (ignore w1 w2))
+ (list r (list 'emit::imm (if (and s (string= s "-")) (- i1) i1)))))
+
+(esrap:defrule immediate int
+ (:lambda (e) (list 'emit::imm e)))
+
+;;; defines rules to parse labels
+
+(esrap:defrule label alpha
+ (:lambda (e) (list 'emit::l e line-number)))
+
+(esrap:defrule label-decl (and alpha #\:)
+ (:function car)
+ (:lambda (e)
+ (util:add-label e line-number)
+ nil))
+
+;;; defines rules to parse instruction types
+
+(defun generate-mnemonic (name ops)
+ (let ((expr `(or ,@ops)))
+ (esrap:add-rule
+ name (make-instance 'esrap:rule :expression expr))))
+
+;; define special cases first
+(generate-mnemonic 'r-type-1-m '("NOT"))
+(generate-mnemonic 'r-type-2-m '("CMP" "CEV"))
+(generate-mnemonic 'i-type-1-m '("LOADV" "LOAD"))
+(generate-mnemonic 'i-type-2-m '("STOREV" "STORE"))
+(generate-mnemonic 'j-type-1-m '("JMP" "JAL"))
+(generate-mnemonic 'j-type-2-m '("PUSH" "POP"))
+
+;; we need to reverse to ensure rules like "ADDV" are matched before "ADD"
+(generate-mnemonic 'r-type-3-m (reverse util:r-type))
+(generate-mnemonic 'i-type-3-m (reverse util:i-type))
+(generate-mnemonic 'j-type-3-m (reverse util:j-type))
+
+;; TODO this is pretty gross
+(defmacro defrule-instr (name type-id order &rest destructure-pattern)
+ "Defines the boilerplate for a common esrap instruction rule.
+NAME is the name of the non-terminal symbol.
+TYPE-ID is the symbol which appears as the first element of a successful parse.
+ORDER is the order to place the parsed tokens in the resulting list.
+DESTRUCTURE-PATTERN is the list of non-terminals on the right side of the grammar rule."
+ (let* ((pattern-size (length destructure-pattern))
+ (spaces (mapcar (lambda (x) (read-from-string (format nil "w~A" x))) (util:iota pattern-size)))
+ (vars (mapcar (lambda (x) (read-from-string (format nil "s~A" x))) (util:iota pattern-size))))
+ `(esrap:defrule ,name
+ (and ,(read-from-string (format nil "~A-m" name)) ,@(util:riffle (make-list pattern-size :initial-element 'space) destructure-pattern))
+ (:destructure (m ,@(util:riffle spaces vars))
+ (declare (ignore ,@spaces))
+ (list ,type-id m ,@(mapcar (lambda (x) (or (nth x vars) ''(emit::rr 0))) order))))))
+
+(defrule-instr r-type-1 'emit::r (1 2 0) register register)
+(defrule-instr r-type-2 'emit::r (0 1 2) register register)
+(defrule-instr r-type-3 'emit::r (1 2 0) register register register)
+(defrule-instr i-type-3 'emit::i (0 1 2) register register immediate)
+(defrule-instr j-type-3 'emit::j (1 0) label)
+
+(esrap:defrule i-type-1 (and i-type-1-m space register space (or dereference var))
+ (:destructure (m w1 s w2 di)
+ (declare (ignore w1 w2))
+ `(emit::i ,m ,s ,@di)))
+
+(esrap:defrule i-type-2 (and i-type-2-m space register space (or dereference var))
+ (:destructure (m w1 s w2 di)
+ (declare (ignore w1 w2))
+ `(emit::i ,m ,@(util:insert-in-middle di s))))
+
+(esrap:defrule j-type-1 (and j-type-1-m space dereference)
+ (:destructure (m w di)
+ (declare (ignore w))
+ `(emit::j ,m ,@di)))
+
+(esrap:defrule j-type-2 (and j-type-2-m space register)
+ (:destructure (m w r)
+ (declare (ignore w))
+ `(emit::j ,m ,r (emit::imm 0))))
+
+(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 i-type-1 i-type-2
+ i-type-3 j-type-1 j-type-2 j-type-3))
+
+;;; defines rules to parse the .text segment
+
+(esrap:defrule instr-clean (and (esrap:? space) instr (esrap:? space) nl-inc)
+ (:function cadr))
+
+(esrap:defrule label-clean (and label-decl (esrap:? space) nl)
+ (:function car))
+
+(esrap:defrule text-line (or instr-clean label-clean))
+
+(esrap:defrule text (and ".TEXT" (esrap:? space) nl (* text-line))
+ (:function cadddr)
+ (:lambda (e) `(emit::x ,@(remove nil e))))
+
+;;; defines rules to parse the .data segment
+
+(esrap:defrule data-word (and (esrap:? space) int)
+ (:function cadr)
+ (:lambda (e)
+ (incf var-offset)
+ e))
+
+(esrap:defrule var-decl alpha
+ (:lambda (e)
+ (util:add-variable e var-offset)
+ nil))
+
+(esrap:defrule data-line (and (esrap:? space) var-decl (+ data-word) (esrap:? space) nl)
+ (:function caddr))
+
+(esrap:defrule data (and ".DATA" (esrap:? space) nl (* data-line))
+ (:function cadddr)
+ (:lambda (e) `(emit::d ,@(apply #'append e))))
+
+;;; defines rules to parse a program
+
+(esrap:defrule str->ast (and (* (or space nl)) data text)
+ (:function cdr)
+ (:lambda (e) `(emit::p ,@e)))