summaryrefslogtreecommitdiff
path: root/huffman-coding/huffman.scm
diff options
context:
space:
mode:
Diffstat (limited to 'huffman-coding/huffman.scm')
-rw-r--r--huffman-coding/huffman.scm124
1 files changed, 124 insertions, 0 deletions
diff --git a/huffman-coding/huffman.scm b/huffman-coding/huffman.scm
new file mode 100644
index 0000000..392ee4c
--- /dev/null
+++ b/huffman-coding/huffman.scm
@@ -0,0 +1,124 @@
+(define-module (huffman)
+ #:use-module (srfi srfi-9)
+ #:use-module (pq) ;; my priority queue implementation
+ #:export (make-node
+ node?
+ node-char
+ node-left
+ node-right
+ decode-text
+ encode-text
+ create-encodings
+ combine-all!
+ combine-and-reinsert!
+ frequencies->pq
+ hash-frequency
+ sort-rao))
+
+
+(define-record-type <node>
+ (make-node char left right)
+ node?
+ (char node-char)
+ (left node-left set-node-left!)
+ (right node-right set-node-right!))
+
+(define (decode-text str keys)
+ "Does the opposite of encode-text."
+ (let ((patterns (map (lambda (pair) (cons (cdr pair) (car pair)))
+ keys)))
+ (let loop ((lst (string->list str)) (cur '()) (res '()))
+ (let ((decoded-char (assoc-ref patterns (list->string (reverse cur)))))
+ (cond
+ ((null? lst)
+ (if decoded-char
+ (list->string (reverse (cons decoded-char res)))
+ (throw 'mismatched-encoding-key str keys)))
+ (decoded-char (loop (cdr lst) (cons (car lst) '()) (cons decoded-char res)))
+ (#t (loop (cdr lst) (cons (car lst) cur) res)))))))
+
+(define (encode-text str)
+ "Given a STR, returns the encoded string,
+alongside the used encoding scheme."
+ (let ((hash-pq (frequencies->pq (hash-frequency str))))
+ (if (equal? (pq-length hash-pq) 0)
+ (cons "" '())
+ (let ((patterns (create-encodings
+ (car (pq-front
+ (combine-all! hash-pq))))))
+ (let loop ((lst (string->list str))
+ (res ""))
+ (if (null? lst)
+ (cons res patterns)
+ (loop (cdr lst)
+ (string-append res (assoc-ref patterns (car lst))))))))))
+
+(define (create-encodings root)
+ "Given node ROOT, traverses the binary tree,
+constructing bit patterns for each letter."
+ (let loop ((str "")
+ (tree root))
+ (if (node-char tree)
+ (acons (node-char tree) (if (string-null? str) "0" str) '())
+ (append (loop (string-append str "0") (node-left tree))
+ (loop (string-append str "1") (node-right tree))))))
+
+(define (combine-all! pq)
+ "Given priority PQ, builds an encoding tree
+by recursively combining high-priority nodes."
+ (if (>= 1 (pq-length pq))
+ pq
+ (begin (combine-and-reinsert! pq)
+ (combine-all! pq))))
+
+(define (combine-and-reinsert! pq)
+ "Given priority queue PQ, retrieves the two
+largest-priority elements, combines, and reinserts
+them."
+ (let* ((e1 (pq-pop! pq))
+ (e2 (pq-pop! pq))
+ (c1 (car e1))
+ (c2 (car e2))
+ (p1 (make-node #f c1 c2)))
+ (pq-push! pq p1 (+ (cdr e1) (cdr e2)))))
+
+(define (frequencies->pq ht)
+ "Given hash table HT of characters
+corresponding to frequencies, returns
+a frequency and alphabetically sorted
+priority queue of nodes.
+
+Negates the frequency, because we want
+to pull the smallest numbers first."
+ (let loop ((els (sort-rao (hash-map->list cons ht)))
+ (pq (make-pq)))
+ (if (null? els)
+ pq
+ (begin
+ (pq-push! pq (make-node (caar els) #f #f) (- (cdar els)))
+ (loop (cdr els) pq)))))
+
+(define (hash-frequency str)
+ "Given STR, returns a hash table of
+each character, corresponding to the number
+of times it appears in STR."
+ (let ((dict (make-hash-table)))
+ (let loop ((chrs (string->list str)))
+ (if (null? chrs)
+ dict
+ (begin (hashq-set! dict (car chrs) (1+ (dict-occur-ref dict (car chrs))))
+ (loop (cdr chrs)))))))
+
+(define (dict-occur-ref dict e)
+ "A wrapper for hashq-ref. Returns '0' if an element
+is not present, rather than '#f'."
+ (or (hashq-ref dict e) 0))
+
+(define (sort-rao lst)
+ "Helper for frequencies->pq.
+
+Given an alist, sorts it in
+reverse alphabetical order."
+ (sort lst
+ (lambda (a b)
+ (char>? (car a) (car b)))))