summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--huffman-coding/huffman-test.scm179
-rw-r--r--huffman-coding/huffman.scm124
l---------huffman-coding/pq.scm1
-rw-r--r--priority-queue/pq.scm1
4 files changed, 304 insertions, 1 deletions
diff --git a/huffman-coding/huffman-test.scm b/huffman-coding/huffman-test.scm
new file mode 100644
index 0000000..b30c4ca
--- /dev/null
+++ b/huffman-coding/huffman-test.scm
@@ -0,0 +1,179 @@
+;; -*- compile-command: "guile -L . huffman-test.scm"; -*-
+(use-modules (srfi srfi-64)
+ (pq)
+ (huffman))
+
+(define runner "a fast runner need never be afraid of the dark")
+(define runner-opt-key '((#\v . "111001") (#\u . "111000") (#\t . "0100") (#\s . "111011") (#\r . "100") (#\o . "111010") (#\n . "1111") (#\k . "101101") (#\i . "101100") (#\h . "101111") (#\f . "1010") (#\e . "110") (#\d . "0101") (#\b . "101110") (#\a . "011") (#\space . "00")))
+(define abcde "A_DEAD_DAD_CEDED_A_BAD_BABE_A_BEADED_ABACA_BED")
+(define abcde-opt-key '((#\_ . "01") (#\E . "110") (#\D . "00") (#\C . "1110") (#\B . "1111") (#\A . "10")))
+(define abcde-encoded "1001001101000010010000111101100011000011001111110000111111011111100110011111110100011000011011111011101001111111000")
+(define (hash-table->alist ht)
+ (hash-map->list cons ht))
+
+(define (node-pq->char-alist pq)
+ (map (lambda (ele)
+ (cons (node-char (car ele))
+ (cdr ele)))
+ (car pq)))
+
+(define (extract-tree pq)
+ (car (pq-front pq)))
+
+
+(test-begin "harness")
+
+(test-equal "hash-frequency zero chars, empty hash"
+ '()
+ (sort-rao (hash-table->alist (hash-frequency ""))))
+
+(test-equal "hash-frequency single 'c' key pair correct"
+ '((#\c . 1))
+ (sort-rao (hash-table->alist (hash-frequency "c"))))
+
+(test-equal "hash-frequency double 'c' key pair correct"
+ '((#\c . 2))
+ (sort-rao (hash-table->alist (hash-frequency "cc"))))
+
+(test-equal "hash-frequency couple letters key pair correct"
+ '((#\c . 2) (#\Y . 1))
+ (sort-rao (hash-table->alist (hash-frequency "cYc"))))
+
+(test-equal "hash-frequency couple letters key pair correct"
+ '((#\c . 2) (#\Y . 1))
+ (sort-rao (hash-table->alist (hash-frequency "cYc"))))
+
+(test-equal "hash-frequency abcde"
+ '((#\_ . 10) (#\E . 7) (#\D . 10) (#\C . 2) (#\B . 6) (#\A . 11))
+ (sort-rao (hash-table->alist (hash-frequency abcde))))
+
+(test-assert "frequencies->pq accepts hashtable and returns a pq"
+ (pq? (frequencies->pq (make-hash-table))))
+
+(test-equal "frequencies->pq single entry"
+ '((#\a . -3))
+ (node-pq->char-alist (frequencies->pq (hash-frequency "aaa"))))
+
+(test-equal "frequencies->pq double entry"
+ '((#\a . -3) (#\b . -3))
+ (node-pq->char-alist (frequencies->pq (hash-frequency "aaabbb"))))
+
+(test-equal "frequencies->pq tie natural order"
+ '((#\a . -3) (#\b . -3) (#\c . -4))
+ (node-pq->char-alist (frequencies->pq (hash-frequency "cabababccc"))))
+
+(test-equal "frequencies->pq tie reverse order"
+ '((#\a . -3) (#\b . -3) (#\c . -4))
+ (node-pq->char-alist (frequencies->pq (hash-frequency "cbababaccc"))))
+
+(test-equal "frequencies->pq abcde"
+ '((#\C . -2) (#\B . -6) (#\E . -7) (#\D . -10) (#\_ . -10) (#\A . -11))
+ (node-pq->char-alist (frequencies->pq (hash-frequency abcde))))
+
+(let* ((pq (frequencies->pq (hash-frequency "aab")))
+ (pqo (frequencies->pq (hash-frequency "aab")))
+ (lc (car (pq-front pq)))
+ (rc (car (pq-rear pq)))
+ (p (begin (combine-and-reinsert! pq)
+ (car (pq-front pq)))))
+
+ (test-assert "pq and pqo now different objects, but both pqs"
+ (and (not (equal? pq pqo))
+ (pq? pq)
+ (pq? pqo)))
+
+ (test-assert "combine-and-reinsert! left child is correct"
+ (equal? (node-left p) lc))
+
+ (test-assert "combine-and-reinsert! right child is correct"
+ (equal? (node-right p) rc))
+
+ (test-equal "combine-and-reinsert! correctly sums priority"
+ -3
+ (cdr (pq-front pq))))
+
+(let* ((pq (frequencies->pq (hash-frequency "aaabbbcccc"))))
+ (combine-and-reinsert! pq)
+
+ (test-equal "combine-and-reinsert! first elements combined"
+ #\c
+ (node-char (car (pq-front pq))))
+
+ (test-equal "combine-and-reinsert! new node inserted in correct spot"
+ -6
+ (cdr (pq-rear pq))))
+
+(let ((pq (combine-all! (frequencies->pq (hash-frequency runner)))))
+
+ (test-assert "combine-all! returns a pq"
+ (pq? pq))
+
+ (test-equal "combine-all! pq is of length one"
+ 1
+ (pq-length pq))
+
+ (test-assert "combine-all! root item is a node"
+ (node? (car (pq-front pq))))
+
+ (test-equal "combine-all! root frequency matches length of str"
+ -46
+ (cdr (pq-front pq))))
+
+(test-equal "create-encodings level-1 tree"
+ '((#\a . "0"))
+ (sort-rao (create-encodings (extract-tree
+ (combine-all!
+ (frequencies->pq
+ (hash-frequency "a")))))))
+
+;;;; note: verified manually: https://en.wikipedia.org/wiki/Huffman_coding
+(test-equal "create-encodings abcde"
+ abcde-opt-key
+ (sort-rao (create-encodings (extract-tree
+ (combine-all!
+ (frequencies->pq
+ (hash-frequency abcde)))))))
+
+(test-equal "create-encodings runner"
+ runner-opt-key
+ (sort-rao (create-encodings (extract-tree
+ (combine-all!
+ (frequencies->pq
+ (hash-frequency runner)))))))
+
+(let ((none (encode-text "")))
+
+ (test-assert "encode-text nothing empty string"
+ (string-null? (car none)))
+
+ (test-assert "encode-text nothing empty alist"
+ (null? (cdr none))))
+
+(let ((limit (encode-text abcde)))
+
+ (test-equal "encode-text abcde string"
+ abcde-encoded
+ (car limit))
+
+ (test-equal "encode-text abcde alist"
+ abcde-opt-key
+ (sort-rao (cdr limit))))
+
+(test-equal "decode single char"
+ "x"
+ (decode-text "00011" '((#\x . "00011") (#\y . "001"))))
+
+(test-equal "decode two chars"
+ "xq"
+ (decode-text "00011" '((#\x . "000") (#\y . "011") (#\q . "11"))))
+
+(test-equal "decode abcde"
+ abcde
+ (decode-text abcde-encoded abcde-opt-key))
+
+(test-error "decode abcde nonsense key"
+ #t
+ (decode-text abcde-encoded runner-opt-key))
+
+
+(test-end "harness")
diff --git a/huffman-coding/huffman.scm b/huffman-coding/huffman.scm
new file mode 100644
index 0000000..392ee4c
--- /dev/null
+++ b/huffman-coding/huffman.scm
@@ -0,0 +1,124 @@
+(define-module (huffman)
+ #:use-module (srfi srfi-9)
+ #:use-module (pq) ;; my priority queue implementation
+ #:export (make-node
+ node?
+ node-char
+ node-left
+ node-right
+ decode-text
+ encode-text
+ create-encodings
+ combine-all!
+ combine-and-reinsert!
+ frequencies->pq
+ hash-frequency
+ sort-rao))
+
+
+(define-record-type <node>
+ (make-node char left right)
+ node?
+ (char node-char)
+ (left node-left set-node-left!)
+ (right node-right set-node-right!))
+
+(define (decode-text str keys)
+ "Does the opposite of encode-text."
+ (let ((patterns (map (lambda (pair) (cons (cdr pair) (car pair)))
+ keys)))
+ (let loop ((lst (string->list str)) (cur '()) (res '()))
+ (let ((decoded-char (assoc-ref patterns (list->string (reverse cur)))))
+ (cond
+ ((null? lst)
+ (if decoded-char
+ (list->string (reverse (cons decoded-char res)))
+ (throw 'mismatched-encoding-key str keys)))
+ (decoded-char (loop (cdr lst) (cons (car lst) '()) (cons decoded-char res)))
+ (#t (loop (cdr lst) (cons (car lst) cur) res)))))))
+
+(define (encode-text str)
+ "Given a STR, returns the encoded string,
+alongside the used encoding scheme."
+ (let ((hash-pq (frequencies->pq (hash-frequency str))))
+ (if (equal? (pq-length hash-pq) 0)
+ (cons "" '())
+ (let ((patterns (create-encodings
+ (car (pq-front
+ (combine-all! hash-pq))))))
+ (let loop ((lst (string->list str))
+ (res ""))
+ (if (null? lst)
+ (cons res patterns)
+ (loop (cdr lst)
+ (string-append res (assoc-ref patterns (car lst))))))))))
+
+(define (create-encodings root)
+ "Given node ROOT, traverses the binary tree,
+constructing bit patterns for each letter."
+ (let loop ((str "")
+ (tree root))
+ (if (node-char tree)
+ (acons (node-char tree) (if (string-null? str) "0" str) '())
+ (append (loop (string-append str "0") (node-left tree))
+ (loop (string-append str "1") (node-right tree))))))
+
+(define (combine-all! pq)
+ "Given priority PQ, builds an encoding tree
+by recursively combining high-priority nodes."
+ (if (>= 1 (pq-length pq))
+ pq
+ (begin (combine-and-reinsert! pq)
+ (combine-all! pq))))
+
+(define (combine-and-reinsert! pq)
+ "Given priority queue PQ, retrieves the two
+largest-priority elements, combines, and reinserts
+them."
+ (let* ((e1 (pq-pop! pq))
+ (e2 (pq-pop! pq))
+ (c1 (car e1))
+ (c2 (car e2))
+ (p1 (make-node #f c1 c2)))
+ (pq-push! pq p1 (+ (cdr e1) (cdr e2)))))
+
+(define (frequencies->pq ht)
+ "Given hash table HT of characters
+corresponding to frequencies, returns
+a frequency and alphabetically sorted
+priority queue of nodes.
+
+Negates the frequency, because we want
+to pull the smallest numbers first."
+ (let loop ((els (sort-rao (hash-map->list cons ht)))
+ (pq (make-pq)))
+ (if (null? els)
+ pq
+ (begin
+ (pq-push! pq (make-node (caar els) #f #f) (- (cdar els)))
+ (loop (cdr els) pq)))))
+
+(define (hash-frequency str)
+ "Given STR, returns a hash table of
+each character, corresponding to the number
+of times it appears in STR."
+ (let ((dict (make-hash-table)))
+ (let loop ((chrs (string->list str)))
+ (if (null? chrs)
+ dict
+ (begin (hashq-set! dict (car chrs) (1+ (dict-occur-ref dict (car chrs))))
+ (loop (cdr chrs)))))))
+
+(define (dict-occur-ref dict e)
+ "A wrapper for hashq-ref. Returns '0' if an element
+is not present, rather than '#f'."
+ (or (hashq-ref dict e) 0))
+
+(define (sort-rao lst)
+ "Helper for frequencies->pq.
+
+Given an alist, sorts it in
+reverse alphabetical order."
+ (sort lst
+ (lambda (a b)
+ (char>? (car a) (car b)))))
diff --git a/huffman-coding/pq.scm b/huffman-coding/pq.scm
new file mode 120000
index 0000000..11bf900
--- /dev/null
+++ b/huffman-coding/pq.scm
@@ -0,0 +1 @@
+../priority-queue/pq.scm \ No newline at end of file
diff --git a/priority-queue/pq.scm b/priority-queue/pq.scm
index 337e5a6..a3a86d4 100644
--- a/priority-queue/pq.scm
+++ b/priority-queue/pq.scm
@@ -1,6 +1,5 @@
(define-module (pq)
#:use-module (srfi srfi-1)
- #:use-module (srfi srfi-9)
#:use-module (ice-9 q)
#:export (make-pq
pq?