diff options
author | bd <bdunaisky@umass.edu> | 2024-11-04 22:28:56 -0500 |
---|---|---|
committer | bd <bdunaisky@umass.edu> | 2024-11-04 22:28:56 -0500 |
commit | 4717b3ea91b47c3d04d09cf84b9221ea1f89d9f2 (patch) | |
tree | 8e5c3d6a83609cd32dbf663b409485697196f7a5 /src | |
parent | bad998157c95851eb29d3e4f6824a276d7032b12 (diff) |
Basic lexer with unit tests
Diffstat (limited to 'src')
-rwxr-xr-x | src/gscc | 3 | ||||
-rw-r--r-- | src/modules/lexer/lexer.scm | 78 | ||||
-rw-r--r-- | src/unit-tests/lexer/lexer-test.scm | 103 |
3 files changed, 158 insertions, 26 deletions
@@ -34,7 +34,8 @@ file on a success." (define (process file parse? assemble?) "Driver for lexing, parsing, and assembly generation." (let ((port (open-input-file file))) - (read-tokens-from-port port) + (set-current-input-port port) + (read-tokens) (close-input-port port))) (define (postprocess file dest) diff --git a/src/modules/lexer/lexer.scm b/src/modules/lexer/lexer.scm index 0661fa6..e07ab67 100644 --- a/src/modules/lexer/lexer.scm +++ b/src/modules/lexer/lexer.scm @@ -1,31 +1,59 @@ (define-module (modules lexer lexer) - #:export (read-tokens-from-port)) + #:export (read-tokens)) -(define (read-tokens-from-port port) - "" - (display (read-char port))) +(define (read-tokens) + "Returns a stream of tokens from the +current input port." + (define (read-tokens-loop tokens-so-far) + (let ((token (read-token))) + (if token + (read-tokens-loop (cons token tokens-so-far)) + (reverse tokens-so-far)))) + (read-tokens-loop '())) (define (read-token) - - (define (read-token-loop first-char) + (let ((chr (read-char))) (cond + ((eof-object? chr) #f) + ((char-whitespace? chr) + (read-token)) + + ((eq? chr #\() + 'left-paren) + ((eq? chr #\)) + 'right-paren) + ((eq? chr #\{) + 'open-brace) + ((eq? chr #\}) + 'close-brace) + ((eq? chr #\;) + 'semi-colon) + + ((char-alphabetic? chr) + (read-identifier chr)) + + ((char-numeric? chr) + (read-constant chr)) + + (#t (error "illegal lexical syntax"))))) + +(define (read-constant chr) + (define (read-constant-helper chrs-so-far) + (let ((chr (peek-char))) + (cond ((and (not (eof-object? chr)) (char-numeric? chr)) + (read-constant-helper (cons (read-char) chrs-so-far))) + ((and (not (eof-object? chr)) (char-alphabetic? chr)) + (error "identifier starting with digit")) + (#t (reverse chrs-so-far))))) + (string->number (list->string (read-constant-helper (list chr))))) - ((or (char-whitespace? first-char)) - (read-token) - - ((eq? first-char #\() - (cons 'left-paren #\()) - ((eq? first-char #\)) - (cons 'right-paren #\))) - ((eq? first-char #\{) - (cons 'right-bracket #\{)) - ((eq? first-char #\}) - (cons 'right-bracket #\})) - - (#t - (error "illegal lexical syntax"))))) - - (let ((first-char (read-char))) - (if (eof-object? first-char) - '() - (cons (read-token-loop first-char) '())))) +(define (read-identifier chr) + (define (read-identifier-helper chrs-so-far) + (let ((chr (peek-char))) + (cond ((and (not (eof-object? chr)) + (or (char-alphabetic? chr) + (char-numeric? chr) + (eq? chr #\_))) + (read-identifier-helper (cons (read-char) chrs-so-far))) + (#t (reverse chrs-so-far))))) + (list->string (read-identifier-helper (list chr)))) diff --git a/src/unit-tests/lexer/lexer-test.scm b/src/unit-tests/lexer/lexer-test.scm new file mode 100644 index 0000000..6596a1f --- /dev/null +++ b/src/unit-tests/lexer/lexer-test.scm @@ -0,0 +1,103 @@ +;; -*- compile-command: "guile -L ./src ./src/unit-tests/lexer/lexer-test.scm"; -*- +(use-modules (srfi srfi-64) + (modules lexer lexer)) + +(define (read-this str) + (set-current-input-port + (open-input-string str))) + + +(test-begin "lexer-harness") + + +(test-equal "empty string" + '() + (begin + (read-this "") + (read-tokens))) + +(test-equal "(" + '(left-paren) + (begin + (read-this "(") + (read-tokens))) + +(test-equal "((" + '(left-paren left-paren) + (begin + (read-this "((") + (read-tokens))) + +(test-equal "( )" + '(left-paren right-paren) + (begin + (read-this "( )") + (read-tokens))) + +(test-equal "( {;} {((};})" + '(left-paren open-brace semi-colon close-brace open-brace left-paren left-paren close-brace semi-colon close-brace right-paren) + (begin + (read-this "( {;} {((};})") + (read-tokens))) + +(test-equal "1" + '(1) + (begin + (read-this "1") + (read-tokens))) + +(test-equal "0" + '(0) + (begin + (read-this "0") + (read-tokens))) + +(test-equal "0011001" + '(11001) + (begin + (read-this "0011001") + (read-tokens))) + +(test-equal "12 {34" + '(12 open-brace 34) + (begin + (read-this "12 {34") + (read-tokens))) + +(test-equal "34;" + '(34 semi-colon) + (begin + (read-this "34;") + (read-tokens))) + + +(test-error "3.4" + (begin + (read-this "3.4") + (read-tokens))) + +(test-equal "a" + '("a") + (begin + (read-this "a") + (read-tokens))) + +(test-equal "a_2" + '("a_2") + (begin + (read-this "a_2") + (read-tokens))) + +(test-error "1foo" + (begin + (read-this "1foo") + (read-tokens))) + +(test-equal "int main(void) {return 2;}" + '("int" "main" left-paren "void" right-paren open-brace "return" 2 semi-colon close-brace) + (begin + (read-this "int main(void) {return 2;}") + (read-tokens))) + + +(test-end "lexer-harness") |