summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorbd <bdunaisky@umass.edu>2024-11-04 22:28:56 -0500
committerbd <bdunaisky@umass.edu>2024-11-04 22:28:56 -0500
commit4717b3ea91b47c3d04d09cf84b9221ea1f89d9f2 (patch)
tree8e5c3d6a83609cd32dbf663b409485697196f7a5 /src
parentbad998157c95851eb29d3e4f6824a276d7032b12 (diff)
Basic lexer with unit tests
Diffstat (limited to 'src')
-rwxr-xr-xsrc/gscc3
-rw-r--r--src/modules/lexer/lexer.scm78
-rw-r--r--src/unit-tests/lexer/lexer-test.scm103
3 files changed, 158 insertions, 26 deletions
diff --git a/src/gscc b/src/gscc
index 7ee1bf1..8914016 100755
--- a/src/gscc
+++ b/src/gscc
@@ -34,7 +34,8 @@ file on a success."
(define (process file parse? assemble?)
"Driver for lexing, parsing, and assembly generation."
(let ((port (open-input-file file)))
- (read-tokens-from-port port)
+ (set-current-input-port port)
+ (read-tokens)
(close-input-port port)))
(define (postprocess file dest)
diff --git a/src/modules/lexer/lexer.scm b/src/modules/lexer/lexer.scm
index 0661fa6..e07ab67 100644
--- a/src/modules/lexer/lexer.scm
+++ b/src/modules/lexer/lexer.scm
@@ -1,31 +1,59 @@
(define-module (modules lexer lexer)
- #:export (read-tokens-from-port))
+ #:export (read-tokens))
-(define (read-tokens-from-port port)
- ""
- (display (read-char port)))
+(define (read-tokens)
+ "Returns a stream of tokens from the
+current input port."
+ (define (read-tokens-loop tokens-so-far)
+ (let ((token (read-token)))
+ (if token
+ (read-tokens-loop (cons token tokens-so-far))
+ (reverse tokens-so-far))))
+ (read-tokens-loop '()))
(define (read-token)
-
- (define (read-token-loop first-char)
+ (let ((chr (read-char)))
(cond
+ ((eof-object? chr) #f)
+ ((char-whitespace? chr)
+ (read-token))
+
+ ((eq? chr #\()
+ 'left-paren)
+ ((eq? chr #\))
+ 'right-paren)
+ ((eq? chr #\{)
+ 'open-brace)
+ ((eq? chr #\})
+ 'close-brace)
+ ((eq? chr #\;)
+ 'semi-colon)
+
+ ((char-alphabetic? chr)
+ (read-identifier chr))
+
+ ((char-numeric? chr)
+ (read-constant chr))
+
+ (#t (error "illegal lexical syntax")))))
+
+(define (read-constant chr)
+ (define (read-constant-helper chrs-so-far)
+ (let ((chr (peek-char)))
+ (cond ((and (not (eof-object? chr)) (char-numeric? chr))
+ (read-constant-helper (cons (read-char) chrs-so-far)))
+ ((and (not (eof-object? chr)) (char-alphabetic? chr))
+ (error "identifier starting with digit"))
+ (#t (reverse chrs-so-far)))))
+ (string->number (list->string (read-constant-helper (list chr)))))
- ((or (char-whitespace? first-char))
- (read-token)
-
- ((eq? first-char #\()
- (cons 'left-paren #\())
- ((eq? first-char #\))
- (cons 'right-paren #\)))
- ((eq? first-char #\{)
- (cons 'right-bracket #\{))
- ((eq? first-char #\})
- (cons 'right-bracket #\}))
-
- (#t
- (error "illegal lexical syntax")))))
-
- (let ((first-char (read-char)))
- (if (eof-object? first-char)
- '()
- (cons (read-token-loop first-char) '()))))
+(define (read-identifier chr)
+ (define (read-identifier-helper chrs-so-far)
+ (let ((chr (peek-char)))
+ (cond ((and (not (eof-object? chr))
+ (or (char-alphabetic? chr)
+ (char-numeric? chr)
+ (eq? chr #\_)))
+ (read-identifier-helper (cons (read-char) chrs-so-far)))
+ (#t (reverse chrs-so-far)))))
+ (list->string (read-identifier-helper (list chr))))
diff --git a/src/unit-tests/lexer/lexer-test.scm b/src/unit-tests/lexer/lexer-test.scm
new file mode 100644
index 0000000..6596a1f
--- /dev/null
+++ b/src/unit-tests/lexer/lexer-test.scm
@@ -0,0 +1,103 @@
+;; -*- compile-command: "guile -L ./src ./src/unit-tests/lexer/lexer-test.scm"; -*-
+(use-modules (srfi srfi-64)
+ (modules lexer lexer))
+
+(define (read-this str)
+ (set-current-input-port
+ (open-input-string str)))
+
+
+(test-begin "lexer-harness")
+
+
+(test-equal "empty string"
+ '()
+ (begin
+ (read-this "")
+ (read-tokens)))
+
+(test-equal "("
+ '(left-paren)
+ (begin
+ (read-this "(")
+ (read-tokens)))
+
+(test-equal "(("
+ '(left-paren left-paren)
+ (begin
+ (read-this "((")
+ (read-tokens)))
+
+(test-equal "( )"
+ '(left-paren right-paren)
+ (begin
+ (read-this "( )")
+ (read-tokens)))
+
+(test-equal "( {;} {((};})"
+ '(left-paren open-brace semi-colon close-brace open-brace left-paren left-paren close-brace semi-colon close-brace right-paren)
+ (begin
+ (read-this "( {;} {((};})")
+ (read-tokens)))
+
+(test-equal "1"
+ '(1)
+ (begin
+ (read-this "1")
+ (read-tokens)))
+
+(test-equal "0"
+ '(0)
+ (begin
+ (read-this "0")
+ (read-tokens)))
+
+(test-equal "0011001"
+ '(11001)
+ (begin
+ (read-this "0011001")
+ (read-tokens)))
+
+(test-equal "12 {34"
+ '(12 open-brace 34)
+ (begin
+ (read-this "12 {34")
+ (read-tokens)))
+
+(test-equal "34;"
+ '(34 semi-colon)
+ (begin
+ (read-this "34;")
+ (read-tokens)))
+
+
+(test-error "3.4"
+ (begin
+ (read-this "3.4")
+ (read-tokens)))
+
+(test-equal "a"
+ '("a")
+ (begin
+ (read-this "a")
+ (read-tokens)))
+
+(test-equal "a_2"
+ '("a_2")
+ (begin
+ (read-this "a_2")
+ (read-tokens)))
+
+(test-error "1foo"
+ (begin
+ (read-this "1foo")
+ (read-tokens)))
+
+(test-equal "int main(void) {return 2;}"
+ '("int" "main" left-paren "void" right-paren open-brace "return" 2 semi-colon close-brace)
+ (begin
+ (read-this "int main(void) {return 2;}")
+ (read-tokens)))
+
+
+(test-end "lexer-harness")