diff options
Diffstat (limited to 'huffman-coding/huffman.scm')
-rw-r--r-- | huffman-coding/huffman.scm | 124 |
1 files changed, 124 insertions, 0 deletions
diff --git a/huffman-coding/huffman.scm b/huffman-coding/huffman.scm new file mode 100644 index 0000000..392ee4c --- /dev/null +++ b/huffman-coding/huffman.scm @@ -0,0 +1,124 @@ +(define-module (huffman) + #:use-module (srfi srfi-9) + #:use-module (pq) ;; my priority queue implementation + #:export (make-node + node? + node-char + node-left + node-right + decode-text + encode-text + create-encodings + combine-all! + combine-and-reinsert! + frequencies->pq + hash-frequency + sort-rao)) + + +(define-record-type <node> + (make-node char left right) + node? + (char node-char) + (left node-left set-node-left!) + (right node-right set-node-right!)) + +(define (decode-text str keys) + "Does the opposite of encode-text." + (let ((patterns (map (lambda (pair) (cons (cdr pair) (car pair))) + keys))) + (let loop ((lst (string->list str)) (cur '()) (res '())) + (let ((decoded-char (assoc-ref patterns (list->string (reverse cur))))) + (cond + ((null? lst) + (if decoded-char + (list->string (reverse (cons decoded-char res))) + (throw 'mismatched-encoding-key str keys))) + (decoded-char (loop (cdr lst) (cons (car lst) '()) (cons decoded-char res))) + (#t (loop (cdr lst) (cons (car lst) cur) res))))))) + +(define (encode-text str) + "Given a STR, returns the encoded string, +alongside the used encoding scheme." + (let ((hash-pq (frequencies->pq (hash-frequency str)))) + (if (equal? (pq-length hash-pq) 0) + (cons "" '()) + (let ((patterns (create-encodings + (car (pq-front + (combine-all! hash-pq)))))) + (let loop ((lst (string->list str)) + (res "")) + (if (null? lst) + (cons res patterns) + (loop (cdr lst) + (string-append res (assoc-ref patterns (car lst)))))))))) + +(define (create-encodings root) + "Given node ROOT, traverses the binary tree, +constructing bit patterns for each letter." + (let loop ((str "") + (tree root)) + (if (node-char tree) + (acons (node-char tree) (if (string-null? str) "0" str) '()) + (append (loop (string-append str "0") (node-left tree)) + (loop (string-append str "1") (node-right tree)))))) + +(define (combine-all! pq) + "Given priority PQ, builds an encoding tree +by recursively combining high-priority nodes." + (if (>= 1 (pq-length pq)) + pq + (begin (combine-and-reinsert! pq) + (combine-all! pq)))) + +(define (combine-and-reinsert! pq) + "Given priority queue PQ, retrieves the two +largest-priority elements, combines, and reinserts +them." + (let* ((e1 (pq-pop! pq)) + (e2 (pq-pop! pq)) + (c1 (car e1)) + (c2 (car e2)) + (p1 (make-node #f c1 c2))) + (pq-push! pq p1 (+ (cdr e1) (cdr e2))))) + +(define (frequencies->pq ht) + "Given hash table HT of characters +corresponding to frequencies, returns +a frequency and alphabetically sorted +priority queue of nodes. + +Negates the frequency, because we want +to pull the smallest numbers first." + (let loop ((els (sort-rao (hash-map->list cons ht))) + (pq (make-pq))) + (if (null? els) + pq + (begin + (pq-push! pq (make-node (caar els) #f #f) (- (cdar els))) + (loop (cdr els) pq))))) + +(define (hash-frequency str) + "Given STR, returns a hash table of +each character, corresponding to the number +of times it appears in STR." + (let ((dict (make-hash-table))) + (let loop ((chrs (string->list str))) + (if (null? chrs) + dict + (begin (hashq-set! dict (car chrs) (1+ (dict-occur-ref dict (car chrs)))) + (loop (cdr chrs))))))) + +(define (dict-occur-ref dict e) + "A wrapper for hashq-ref. Returns '0' if an element +is not present, rather than '#f'." + (or (hashq-ref dict e) 0)) + +(define (sort-rao lst) + "Helper for frequencies->pq. + +Given an alist, sorts it in +reverse alphabetical order." + (sort lst + (lambda (a b) + (char>? (car a) (car b))))) |