From ce80845b5cb5b473327e4e561342490576d83a90 Mon Sep 17 00:00:00 2001 From: bd Date: Wed, 9 Apr 2025 20:25:57 -0400 Subject: Successfully parse every instruction type --- src/parse.lisp | 95 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 53 insertions(+), 42 deletions(-) (limited to 'src/parse.lisp') diff --git a/src/parse.lisp b/src/parse.lisp index 54b0709..a92eae7 100644 --- a/src/parse.lisp +++ b/src/parse.lisp @@ -1,22 +1,24 @@ (in-package #:parse) (esrap:defrule space - (+ (or #\Space #\Tab)) + (+ (or #\space #\tab)) (:constant nil)) +(esrap:defrule newline + (+ #\newline)) + ;;; defines rules to parse an integer in various bases -(esrap:defrule binary (and #\0 #\b (+ (or "0" "1"))) +(esrap:defrule binary (and #\0 #\B (+ (or "0" "1"))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 2))) -(esrap:defrule octal (and #\0 #\o (+ (or (esrap:character-ranges (#\0 #\7))))) +(esrap:defrule octal (and #\0 #\O (+ (or (esrap:character-ranges (#\0 #\7))))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 8))) (esrap:defrule decimal (+ (or (esrap:character-ranges (#\0 #\9)))) (:lambda (list) (parse-integer (esrap:text list) :radix 10))) -(esrap:defrule hex (and #\0 #\x (+ (or (esrap:character-ranges (#\0 #\9)) - "a" "b" "c" "d" "e" "f" +(esrap:defrule hex (and #\0 #\X (+ (or (esrap:character-ranges (#\0 #\9)) "A" "B" "C" "D" "E" "F"))) (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 16))) @@ -30,7 +32,10 @@ (esrap:defrule dereference (and (esrap:? (or #\+ #\-)) int #\( register #\)) (:destructure (s i1 w1 r w2) (declare (ignore w1 w2)) - (list r (if (and s (string= s "-")) (- i1) i1)))) + (list r (list 'imm (if (and s (string= s "-")) (- i1) i1))))) + +(esrap:defrule immediate int + (:lambda (i) (list 'imm i))) ;;; defines rules to parse labels @@ -56,6 +61,11 @@ (esrap:defrule j-type-3-m (or "PUSH" "POP")) (defmacro defrule-instr (name type-id order &rest destructure-pattern) + "Defines the boilerplate for a common esrap instruction rule. +NAME is the name of the non-terminal symbol. +TYPE-ID is the symbol which appears as the first element of a successful parse. +ORDER is the order to place the parsed tokens in the resulting list. +DESTRUCTURE-PATTERN is the list of non-terminals on the right side of the grammar rule." (let* ((pattern-size (length destructure-pattern)) (spaces (mapcar (lambda (x) (read-from-string (format nil "w~A" x))) (util:iota pattern-size))) (vars (mapcar (lambda (x) (read-from-string (format nil "s~A" x))) (util:iota pattern-size)))) @@ -68,40 +78,41 @@ (defrule-instr r-type-1 'r (1 2 0) register register register) (defrule-instr r-type-2 'r (1 2 0) register register) (defrule-instr r-type-3 'r (0 1 2) register register) -;;(defrule-instr j-type-1 'j (0 1) dereference) + +(esrap:defrule i-type-1 (and i-type-1-m space register space dereference) + (:destructure (m w1 s w2 di) + (declare (ignore w1 w2)) + `(i ,m ,s ,@di))) + +(esrap:defrule i-type-2 (and i-type-2-m space register space dereference) + (:destructure (m w1 s w2 di) + (declare (ignore w1 w2)) + `(i ,m ,@(util:insert-in-middle di s)))) + +(defrule-instr i-type-3 'i (0 1 2) register register immediate) +(esrap:defrule j-type-1 (and j-type-1-m space dereference) + (:destructure (m w di) + (declare (ignore w)) + `(j ,m ,@di))) + (defrule-instr j-type-2 'j (1 0) label) -(defrule-instr j-type-3 'j (0 1) register) - -;; (esrap:defrule i-type-1 (and i-type-1-m space register space dereference) -;; (:destructure (m w1 s w2 di) -;; (declare (ignore w1 w2)) -;; `(i ,m ,s ,@di))) -;; (esrap:defrule i-type-2 (and i-type-2-m space register space dereference) -;; (:destructure (m w1 s w2 di) -;; (declare (ignore w1 w2)) -;; `(i ,m ,@(util:insert-in-middle di s)))) -;; (esrap:defrule i-type-3 (and i-type-3-m space register space register space int) -;; (:destructure (m w1 s1 w2 s2 w3 i) -;; (declare (ignore w1 w2 w3)) -;; (list i m s1 s2 i))) -;; (esrap:defrule j-type-1 (and j-type-1-m space dereference) -;; (:destructure (m w di) -;; (declare (ignore w)) -;; `(j ,m ,@di))) -;; (esrap:defrule j-type-2 (and j-type-2-m space label) -;; (:destructure (m w label) -;; (declare (ignore w)) -;; (list j m "00000" label))) -;; (esrap:defrule j-type-3 (and j-type-3-m space register) -;; (:destructure (m w r) -;; (declare (ignore w)) -;; (list j m r "00000"))) - -(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 j-type-2 j-type-3)) - - -;; (esrap:parse 'register "$3") - -;; (esrap:parse 'j-type-1 "JMP 3($3)") -;; (esrap:parse 'j-type-2 "JRL FOO") -;; (esrap:parse 'j-type-3 "PUSH $1") +(esrap:defrule j-type-3 (and j-type-3-m space register) + (:destructure (m w r) + (declare (ignore w)) + `(j ,m ,r (imm 0)))) + +(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 i-type-1 i-type-2 + i-type-3 j-type-1 j-type-2 j-type-3 label-decl)) + +;;; defines rules to parse the .text segment + +(esrap:defrule instr-clean (and (esrap:? space) instr newline) + (:destructure (w1 i w2) + (declare (ignore nl l)) + i)) + +(esrap:defrule text (and ".TEXT" newline + (* instr-clean)) + (:destructure (txt nl is) + (declare (ignore txt nl)) + (list 'text is))) -- cgit v1.2.3