summaryrefslogtreecommitdiff
path: root/huffman-coding/huffman.scm
blob: 392ee4ca6a8cf006bf4883feeab9d7d9948f8c56 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
(define-module (huffman)
  #:use-module (srfi srfi-9)
  #:use-module (pq) ;; my priority queue implementation
  #:export (make-node
            node?
            node-char
            node-left
            node-right
            decode-text
            encode-text
            create-encodings
            combine-all!
            combine-and-reinsert!
            frequencies->pq
            hash-frequency
            sort-rao))


(define-record-type <node>
  (make-node char left right)
  node?
  (char   node-char)
  (left   node-left   set-node-left!)
  (right  node-right  set-node-right!))

(define (decode-text str keys)
  "Does the opposite of encode-text."
  (let ((patterns (map (lambda (pair) (cons (cdr pair) (car pair)))
                       keys)))
    (let loop ((lst (string->list str)) (cur '()) (res '()))
      (let ((decoded-char (assoc-ref patterns (list->string (reverse cur)))))
        (cond
         ((null? lst)
          (if decoded-char
              (list->string (reverse (cons decoded-char res)))
              (throw 'mismatched-encoding-key str keys)))
         (decoded-char (loop (cdr lst) (cons (car lst) '()) (cons decoded-char res)))
         (#t (loop (cdr lst) (cons (car lst) cur) res)))))))

(define (encode-text str)
  "Given a STR, returns the encoded string,
alongside the used encoding scheme."
  (let ((hash-pq (frequencies->pq (hash-frequency str))))
    (if (equal? (pq-length hash-pq) 0)
        (cons "" '())
        (let ((patterns (create-encodings
                         (car (pq-front
                               (combine-all! hash-pq))))))
          (let loop ((lst (string->list str))
                     (res ""))
            (if (null? lst)
                (cons res patterns)
                (loop (cdr lst)
                      (string-append res (assoc-ref patterns (car lst))))))))))

(define (create-encodings root)
  "Given node ROOT, traverses the binary tree,
constructing bit patterns for each letter."
  (let loop ((str "")
             (tree root))
    (if (node-char tree)
        (acons (node-char tree) (if (string-null? str) "0" str) '())
        (append (loop (string-append str "0") (node-left tree))
                (loop (string-append str "1") (node-right tree))))))

(define (combine-all! pq)
  "Given priority PQ, builds an encoding tree
by recursively combining high-priority nodes."
  (if (>= 1 (pq-length pq))
      pq
      (begin (combine-and-reinsert! pq)
             (combine-all! pq))))

(define (combine-and-reinsert! pq)
  "Given priority queue PQ, retrieves the two
largest-priority elements, combines, and reinserts
them."
  (let* ((e1 (pq-pop! pq))
         (e2 (pq-pop! pq))
         (c1 (car e1))
         (c2 (car e2))
         (p1 (make-node #f c1 c2)))
    (pq-push! pq p1 (+ (cdr e1) (cdr e2)))))

(define (frequencies->pq ht)
  "Given hash table HT of characters
corresponding to frequencies, returns
a frequency and alphabetically sorted
priority queue of nodes.

Negates the frequency, because we want
to pull the smallest numbers first."
  (let loop ((els (sort-rao (hash-map->list cons ht)))
             (pq (make-pq)))
    (if (null? els)
        pq
        (begin
          (pq-push! pq (make-node (caar els) #f #f) (- (cdar els)))
          (loop (cdr els) pq)))))

(define (hash-frequency str)
  "Given STR, returns a hash table of
each character, corresponding to the number
of times it appears in STR."
  (let ((dict (make-hash-table)))
    (let loop ((chrs (string->list str)))
      (if (null? chrs)
          dict
          (begin (hashq-set! dict (car chrs) (1+ (dict-occur-ref dict (car chrs))))
                 (loop (cdr chrs)))))))

(define (dict-occur-ref dict e)
  "A wrapper for hashq-ref. Returns '0' if an element
is not present, rather than '#f'."
  (or (hashq-ref dict e) 0))

(define (sort-rao lst)
  "Helper for frequencies->pq.

Given an alist, sorts it in
reverse alphabetical order."
  (sort lst
        (lambda (a b)
          (char>? (car a) (car b)))))