(define-module (huffman) #:use-module (srfi srfi-9) #:use-module (pq) ;; my priority queue implementation #:export (make-node node? node-char node-left node-right decode-text encode-text create-encodings combine-all! combine-and-reinsert! frequencies->pq hash-frequency sort-rao)) (define-record-type (make-node char left right) node? (char node-char) (left node-left set-node-left!) (right node-right set-node-right!)) (define (decode-text str keys) "Does the opposite of encode-text." (let ((patterns (map (lambda (pair) (cons (cdr pair) (car pair))) keys))) (let loop ((lst (string->list str)) (cur '()) (res '())) (let ((decoded-char (assoc-ref patterns (list->string (reverse cur))))) (cond ((null? lst) (if decoded-char (list->string (reverse (cons decoded-char res))) (throw 'mismatched-encoding-key str keys))) (decoded-char (loop (cdr lst) (cons (car lst) '()) (cons decoded-char res))) (#t (loop (cdr lst) (cons (car lst) cur) res))))))) (define (encode-text str) "Given a STR, returns the encoded string, alongside the used encoding scheme." (let ((hash-pq (frequencies->pq (hash-frequency str)))) (if (equal? (pq-length hash-pq) 0) (cons "" '()) (let ((patterns (create-encodings (car (pq-front (combine-all! hash-pq)))))) (let loop ((lst (string->list str)) (res "")) (if (null? lst) (cons res patterns) (loop (cdr lst) (string-append res (assoc-ref patterns (car lst)))))))))) (define (create-encodings root) "Given node ROOT, traverses the binary tree, constructing bit patterns for each letter." (let loop ((str "") (tree root)) (if (node-char tree) (acons (node-char tree) (if (string-null? str) "0" str) '()) (append (loop (string-append str "0") (node-left tree)) (loop (string-append str "1") (node-right tree)))))) (define (combine-all! pq) "Given priority PQ, builds an encoding tree by recursively combining high-priority nodes." (if (>= 1 (pq-length pq)) pq (begin (combine-and-reinsert! pq) (combine-all! pq)))) (define (combine-and-reinsert! pq) "Given priority queue PQ, retrieves the two largest-priority elements, combines, and reinserts them." (let* ((e1 (pq-pop! pq)) (e2 (pq-pop! pq)) (c1 (car e1)) (c2 (car e2)) (p1 (make-node #f c1 c2))) (pq-push! pq p1 (+ (cdr e1) (cdr e2))))) (define (frequencies->pq ht) "Given hash table HT of characters corresponding to frequencies, returns a frequency and alphabetically sorted priority queue of nodes. Negates the frequency, because we want to pull the smallest numbers first." (let loop ((els (sort-rao (hash-map->list cons ht))) (pq (make-pq))) (if (null? els) pq (begin (pq-push! pq (make-node (caar els) #f #f) (- (cdar els))) (loop (cdr els) pq))))) (define (hash-frequency str) "Given STR, returns a hash table of each character, corresponding to the number of times it appears in STR." (let ((dict (make-hash-table))) (let loop ((chrs (string->list str))) (if (null? chrs) dict (begin (hashq-set! dict (car chrs) (1+ (dict-occur-ref dict (car chrs)))) (loop (cdr chrs))))))) (define (dict-occur-ref dict e) "A wrapper for hashq-ref. Returns '0' if an element is not present, rather than '#f'." (or (hashq-ref dict e) 0)) (define (sort-rao lst) "Helper for frequencies->pq. Given an alist, sorts it in reverse alphabetical order." (sort lst (lambda (a b) (char>? (car a) (car b)))))