1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
(define-module (huffman)
#:use-module (srfi srfi-9)
#:use-module (pq) ;; my priority queue implementation
#:export (make-node
node?
node-char
node-left
node-right
decode-text
encode-text
create-encodings
combine-all!
combine-and-reinsert!
frequencies->pq
hash-frequency
sort-rao))
(define-record-type <node>
(make-node char left right)
node?
(char node-char)
(left node-left set-node-left!)
(right node-right set-node-right!))
(define (decode-text str keys)
"Does the opposite of encode-text."
(let ((patterns (map (lambda (pair) (cons (cdr pair) (car pair)))
keys)))
(let loop ((lst (string->list str)) (cur '()) (res '()))
(let ((decoded-char (assoc-ref patterns (list->string (reverse cur)))))
(cond
((null? lst)
(if decoded-char
(list->string (reverse (cons decoded-char res)))
(throw 'mismatched-encoding-key str keys)))
(decoded-char (loop (cdr lst) (cons (car lst) '()) (cons decoded-char res)))
(#t (loop (cdr lst) (cons (car lst) cur) res)))))))
(define (encode-text str)
"Given a STR, returns the encoded string,
alongside the used encoding scheme."
(let ((hash-pq (frequencies->pq (hash-frequency str))))
(if (equal? (pq-length hash-pq) 0)
(cons "" '())
(let ((patterns (create-encodings
(car (pq-front
(combine-all! hash-pq))))))
(let loop ((lst (string->list str))
(res ""))
(if (null? lst)
(cons res patterns)
(loop (cdr lst)
(string-append res (assoc-ref patterns (car lst))))))))))
(define (create-encodings root)
"Given node ROOT, traverses the binary tree,
constructing bit patterns for each letter."
(let loop ((str "")
(tree root))
(if (node-char tree)
(acons (node-char tree) (if (string-null? str) "0" str) '())
(append (loop (string-append str "0") (node-left tree))
(loop (string-append str "1") (node-right tree))))))
(define (combine-all! pq)
"Given priority PQ, builds an encoding tree
by recursively combining high-priority nodes."
(if (>= 1 (pq-length pq))
pq
(begin (combine-and-reinsert! pq)
(combine-all! pq))))
(define (combine-and-reinsert! pq)
"Given priority queue PQ, retrieves the two
largest-priority elements, combines, and reinserts
them."
(let* ((e1 (pq-pop! pq))
(e2 (pq-pop! pq))
(c1 (car e1))
(c2 (car e2))
(p1 (make-node #f c1 c2)))
(pq-push! pq p1 (+ (cdr e1) (cdr e2)))))
(define (frequencies->pq ht)
"Given hash table HT of characters
corresponding to frequencies, returns
a frequency and alphabetically sorted
priority queue of nodes.
Negates the frequency, because we want
to pull the smallest numbers first."
(let loop ((els (sort-rao (hash-map->list cons ht)))
(pq (make-pq)))
(if (null? els)
pq
(begin
(pq-push! pq (make-node (caar els) #f #f) (- (cdar els)))
(loop (cdr els) pq)))))
(define (hash-frequency str)
"Given STR, returns a hash table of
each character, corresponding to the number
of times it appears in STR."
(let ((dict (make-hash-table)))
(let loop ((chrs (string->list str)))
(if (null? chrs)
dict
(begin (hashq-set! dict (car chrs) (1+ (dict-occur-ref dict (car chrs))))
(loop (cdr chrs)))))))
(define (dict-occur-ref dict e)
"A wrapper for hashq-ref. Returns '0' if an element
is not present, rather than '#f'."
(or (hashq-ref dict e) 0))
(define (sort-rao lst)
"Helper for frequencies->pq.
Given an alist, sorts it in
reverse alphabetical order."
(sort lst
(lambda (a b)
(char>? (car a) (car b)))))
|