summaryrefslogtreecommitdiff
path: root/src/parse.lisp
blob: f9ede2055d06228c14808384aba9097aa524ff2a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
(in-package #:parse)

(defparameter line-number 0)

(esrap:defrule space
    (+ (or #\space #\tab))
  (:constant nil))

(esrap:defrule newline
    (+ #\newline)
  (:destructure (n)
    (declare (ignore n))
    (incf line-number)
    nil))

;;; defines rules to parse an integer in various bases

(defmacro define-number-rule ())

(esrap:defrule binary (and #\0 #\B (+ (or "0" "1")))
  (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 2)))

(esrap:defrule octal (and #\0 #\O (+ (or (esrap:character-ranges (#\0 #\7)))))
  (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 8)))

(esrap:defrule decimal (+ (or (esrap:character-ranges (#\0 #\9))))
  (:lambda (list) (parse-integer (esrap:text list) :radix 10)))

(esrap:defrule hex (and #\0 #\X (+ (or (esrap:character-ranges (#\0 #\9))
                                       "A" "B" "C" "D" "E" "F")))
  (:lambda (list) (parse-integer (esrap:text (cddr list)) :radix 16)))

(esrap:defrule int (or binary octal hex decimal))

;;; defines rules to parse an operand

(esrap:defrule register (and #\$ (or int reg-id))
  (:function cadr)
  (:lambda (id) (list 'rr id)))

(esrap:defrule dereference (and (esrap:? (or #\+ #\-)) int #\( register #\))
  (:destructure (s i1 w1 r w2)
    (declare (ignore w1 w2))
    (list r (list 'imm (if (and s (string= s "-")) (- i1) i1)))))

(esrap:defrule immediate int
  (:lambda (i) (list 'imm i)))

;;; defines rules to parse labels

(esrap:defrule label (+ (alphanumericp character))
  (:lambda (list) (list 'l (esrap:text list))))

(esrap:defrule label-decl (and label #\:)
  (:function car)
  (:lambda (l)
    (util:add-label l line-number)
    ;; this line isn't in the final program
    (decf line-number)
    nil))

;;; defines rules to parse instruction types

(defun generate-mnemonic (name ops)
  (let ((expr `(or ,@ops)))
    (esrap:add-rule
     name (make-instance 'esrap:rule :expression expr))))

;; define special cases first
(generate-mnemonic 'r-type-1-m '("NOT"))
(generate-mnemonic 'r-type-2-m '("CMP" "CEV"))
(generate-mnemonic 'i-type-1-m '("LOADV" "LOAD"))
(generate-mnemonic 'i-type-2-m '("STOREV" "STORE"))
(generate-mnemonic 'j-type-1-m '("JMP" "JAL"))
(generate-mnemonic 'j-type-2-m '("PUSH" "POP"))

;; we need to reverse to ensure rules like "ADDV" are matched before "ADD"
(generate-mnemonic 'r-type-3-m (reverse util:r-type))
(generate-mnemonic 'i-type-3-m (reverse util:i-type))
(generate-mnemonic 'j-type-3-m (reverse util:j-type))

(defmacro defrule-instr (name type-id order &rest destructure-pattern)
  "Defines the boilerplate for a common esrap instruction rule.
NAME is the name of the non-terminal symbol.
TYPE-ID is the symbol which appears as the first element of a successful parse.
ORDER is the order to place the parsed tokens in the resulting list.
DESTRUCTURE-PATTERN is the list of non-terminals on the right side of the grammar rule."
  (let* ((pattern-size (length destructure-pattern))
         (spaces (mapcar (lambda (x) (read-from-string (format nil "w~A" x))) (util:iota pattern-size)))
         (vars (mapcar (lambda (x) (read-from-string (format nil "s~A" x))) (util:iota pattern-size))))
    `(esrap:defrule ,name
         (and ,(read-from-string (format nil "~A-m" name)) ,@(util:riffle (make-list pattern-size :initial-element 'space) destructure-pattern))
       (:destructure (m ,@(util:riffle spaces vars))
         (declare (ignore ,@spaces))
         (list ,type-id m ,@(mapcar (lambda (x) (or (nth x vars) ''(rr 0))) order))))))

(defrule-instr r-type-1 'r (1 2 0) register register)
(defrule-instr r-type-2 'r (0 1 2) register register)
(defrule-instr r-type-3 'r (1 2 0) register register register)
(defrule-instr i-type-3 'i (0 1 2) register register immediate)
(defrule-instr j-type-3 'j (1 0) label)

(esrap:defrule i-type-1 (and i-type-1-m space register space dereference)
  (:destructure (m w1 s w2 di)
    (declare (ignore w1 w2))
    `(i ,m ,s ,@di)))

(esrap:defrule i-type-2 (and i-type-2-m space register space dereference)
  (:destructure (m w1 s w2 di)
    (declare (ignore w1 w2))
    `(i ,m ,@(util:insert-in-middle di s))))

(esrap:defrule j-type-1 (and j-type-1-m space dereference)
  (:destructure (m w di)
    (declare (ignore w))
    `(j ,m ,@di)))

(esrap:defrule j-type-2 (and j-type-2-m space register)
  (:destructure (m w r)
    (declare (ignore w))
    `(j ,m ,r (imm 0))))

(esrap:defrule instr (or r-type-1 r-type-2 r-type-3 i-type-1 i-type-2
                         i-type-3 j-type-1 j-type-2 j-type-3 label-decl))

;;; defines rules to parse the .text segment

(esrap:defrule instr-clean (and (esrap:? space) instr newline)
  (:function cadr))

(esrap:defrule text (and ".TEXT" newline
                         (* instr-clean))
  (:function caddr)
  (:lambda (instr)
    `(text ,@(remove nil instr))))

;;; defines rules to parse the .data segment