;;;; Data Compressor
(declare (usual-integrations))
-
-;; This declaration is worth up to 30% speedup
-(declare
- (ignore-reference-traps
- (set root-nodes oldest-node newest-node window-filled? byte-buffer)))
-
-;; This does not seem to make much difference:
-(declare (ignore-reference-traps (set current-pointer current-bp command-bp)))
\f
;;; This compression program is based on the algorithm described in
;;; "Data Compression with Finite Windows", by Edward R. Fiala and
;;; the algorithms B1, B2, and C2. The encoder, which appears below,
;;; determines the algorithm.
\f
-(define input-port)
-(define output-port)
-
(define (compress ifile ofile)
(call-with-binary-input-file (merge-pathnames ifile)
(lambda (input)
(write-string "Compressed-B1-1.00" output)
(compress-ports input output))))))
+(begin
+(declare (no-range-checks) (no-type-checks))
+(define-structure (compression-state
+ (conc-name #f)
+ (constructor make-compression-state
+ (root-nodes byte-buffer output-buffer
+ input-port output-port)))
+ root-nodes
+ (oldest-node #f)
+ (newest-node #f)
+ (window-filled? #f)
+ (compress-continuation unspecific)
+ byte-buffer
+
+ ;; Current "pointer" in input stream. The pointer is updated at each
+ ;; literal character and copy command.
+ (current-pointer 0)
+
+ ;; Starting position of current command in byte buffer.
+ (current-bp 0)
+ (command-bp 0)
+
+ output-buffer
+ input-port
+ output-port))
+
(define (compress-ports input output)
- (fluid-let ((root-nodes (make-vector 256 false))
- (oldest-node false)
- (newest-node false)
- (window-filled? false)
- (compress-continuation)
- (byte-buffer (make-byte-buffer))
- (current-pointer 0)
- (current-bp 0)
- (command-bp 0)
- (output-buffer (make-output-buffer))
- (input-port input)
- (output-port output))
+ (let ((state (make-compression-state
+ (make-vector 256 false)
+ (make-byte-buffer)
+ (make-output-buffer)
+ input output)))
(call-with-current-continuation
(lambda (continuation)
- (set! compress-continuation continuation)
- (idle)))
- (flush-output-buffer)))
+ (set-compress-continuation! state continuation)
+ (idle state)))
+ (flush-output-buffer state)))
\f
-(define (idle)
+(define (idle state)
;; This is the top of the compression loop. We've just emitted a
;; command. If the next two bytes can be matched against some text
;; in the window, start a copy command, otherwise start a literal.
- (guarantee-buffer-space 2)
- (let ((node (match-first)))
+ (guarantee-buffer-space state 2)
+ (let ((node (match-first state)))
(if (not node)
- (generate-literal)
- (let ((node (match-next node 1)))
+ (generate-literal state)
+ (let ((node (match-next state node 1)))
(if (not node)
- (generate-literal)
- (generate-copy node 2))))))
+ (generate-literal state)
+ (generate-copy state node 2))))))
-(define (generate-literal)
- (guarantee-buffer-space (fix:+ literal-max 2))
+(define (generate-literal state)
+ (guarantee-buffer-space state (fix:+ literal-max 2))
(letrec
((loop
(lambda (nb)
- (let ((node (match-first)))
+ (let ((node (match-first state)))
(if (not node)
(continue nb)
- (let ((node (match-next node 1)))
+ (let ((node (match-next state node 1)))
(if (not node)
(continue nb)
- (let ((node (match-next node 2)))
+ (let ((node (match-next state node 2)))
(if (not node)
(begin
- (unread-byte)
+ (unread-byte state)
(continue nb))
(let ((nb*
- (let ((cbp current-bp)
+ (let ((cbp (current-bp state))
(nbp (node-bp node)))
(fix:- (if (fix:< cbp nbp)
(fix:+ cbp buffer-size)
;; would result in a copy that is
;; copying from itself.
(begin
- (unread-bytes 2)
+ (unread-bytes state 2)
(continue nb))
(begin
- (write-literal nb)
- (generate-copy node 3))))))))))))
+ (write-literal state nb)
+ (generate-copy state node 3))))))))))))
(continue
(lambda (nb)
- (increment-current-pointer)
- (increment-bp)
+ (increment-current-pointer state)
+ (increment-bp state)
(let ((nb (fix:+ nb 1)))
(if (fix:< nb literal-max)
(loop nb)
(begin
- (write-literal nb)
- (idle)))))))
- (increment-current-pointer)
- (increment-bp)
+ (write-literal state nb)
+ (idle state)))))))
+ (increment-current-pointer state)
+ (increment-bp state)
(loop 1)))
\f
-(define (generate-copy node nb)
- (guarantee-buffer-space copy-max)
- (let ((copy-pointer current-pointer))
+(define (generate-copy state node nb)
+ (guarantee-buffer-space state copy-max)
+ (let ((copy-pointer (current-pointer state)))
(let ((finish
(lambda (nb pointer bp)
(let ((nb*
- (fix:- (let ((bp* command-bp))
+ (fix:- (let ((bp* (command-bp state)))
(if (fix:< bp* bp)
(fix:+ bp* buffer-size)
bp*))
bp))
(do-copy
(lambda (nb)
- (write-copy nb pointer copy-pointer)
- (increment-current-pointer)
- (idle))))
+ (write-copy state nb pointer copy-pointer)
+ (increment-current-pointer state)
+ (idle state))))
;; NB is the number of bytes that we want to write a
;; copy command for; NB* is the number of bytes between
;; the start of the copy and the current position. If
(if (fix:<= nb nb*)
(do-copy nb)
(begin
- (unread-bytes (fix:- nb nb*))
+ (unread-bytes state (fix:- nb nb*))
(if (fix:= nb* 1)
- (generate-literal)
+ (generate-literal state)
(do-copy nb*))))))))
(let loop ((node node) (nb nb))
(let ((pointer (node-pointer node))
(bp (node-bp node)))
- (if (not (byte-ready?))
+ (if (not (byte-ready? state))
(finish nb pointer bp)
- (let ((node* (match-next node nb)))
+ (let ((node* (match-next state node nb)))
(if (not node*)
(finish nb pointer bp)
(let ((nb (fix:+ nb 1)))
(finish nb pointer bp)
(let ((pointer (node-pointer node*))
(bp (node-bp node*)))
- (update-node-pointer node*)
+ (update-node-pointer state node*)
(finish nb pointer bp)))))))))))))
-(define (match-first)
- (let ((byte (read-byte)))
- (let ((node (vector-ref root-nodes byte)))
+(define (match-first state)
+ (let ((byte (read-byte state)))
+ (let ((node (vector-ref (root-nodes state) byte)))
(if (not node)
- (add-child false byte (make-node 0)))
+ (add-child state false byte (make-node state 0)))
node)))
-(define (match-next node nb)
- (let ((byte (peek-byte)))
+(define (match-next state node nb)
+ (let ((byte (peek-byte state)))
(if (fix:= (node-nb node) nb)
(begin
- (update-node-pointer node)
+ (update-node-pointer state node)
(let loop ((child (node-children node)))
(cond ((not child)
- (add-child node byte (make-node 0))
+ (add-child state node byte (make-node state 0))
false)
((fix:= byte (node-byte child))
- (discard-byte)
+ (discard-byte state)
child)
(else
(loop (node-next child))))))
- (let ((byte* (node-ref node nb)))
+ (let ((byte* (node-ref node nb state)))
(if (fix:= byte byte*)
(begin
- (discard-byte)
+ (discard-byte state)
node)
(begin
- (let ((parent (make-node nb)))
- (replace-child node parent)
- (add-child parent byte* node)
- (add-child parent byte (make-node 0)))
+ (let ((parent (make-node state nb)))
+ (replace-child state node parent)
+ (add-child state parent byte* node)
+ (add-child state parent byte (make-node state 0)))
false))))))
\f
;;;; PATRICIA Tree Database
-(define root-nodes)
-(define oldest-node)
-(define newest-node)
-(define window-filled?)
-
-(define-structure (node (constructor %make-node (nb older)))
+(define-structure (node (constructor %make-node (nb older pointer bp)))
;; The parent of this node, or #F for a root node.
(parent false)
(newer false)
;; The command pointer for this node.
- (pointer current-pointer)
+ pointer
;; The byte pointer for this node.
- (bp current-bp))
-
-(define (make-node nb)
- (let ((node (%make-node nb newest-node)))
- (if newest-node
- (set-node-newer! newest-node node)
- (set! oldest-node node))
- (set! newest-node node)
+ bp)
+
+(define (make-node state nb)
+ (let ((node (%make-node nb (newest-node state)
+ (current-pointer state) (current-bp state))))
+ (if (newest-node state)
+ (set-node-newer! (newest-node state) node)
+ (set-oldest-node! state node))
+ (set-newest-node! state node)
node))
-(define (update-node-pointer node)
- (set-node-pointer! node current-pointer)
- (set-node-bp! node current-bp)
+(define (update-node-pointer state node)
+ (set-node-pointer! node (current-pointer state))
+ (set-node-bp! node (current-bp state))
(let ((older (node-older node))
(newer (node-newer node)))
(if newer
(set-node-older! newer older)
(if older
(set-node-newer! older newer)
- (set! oldest-node newer))
+ (set-oldest-node! state newer))
(set-node-newer! node false)
- (set-node-older! node newest-node)
- (set-node-newer! newest-node node)
- (set! newest-node node)
+ (set-node-older! node (newest-node state))
+ (set-node-newer! (newest-node state) node)
+ (set-newest-node! state node)
unspecific))))
-(define (add-child parent byte child)
+(define (add-child state parent byte child)
(set-node-parent! child parent)
(set-node-byte! child byte)
(if parent
(set-node-next! child sibling)
(if sibling (set-node-previous! sibling child))
(set-node-children! parent child))
- (vector-set! root-nodes byte child)))
+ (vector-set! (root-nodes state) byte child)))
\f
-(define (replace-child child child*)
+(define (replace-child state child child*)
(let ((parent (node-parent child))
(byte (node-byte child)))
(set-node-parent! child* parent)
(set-node-next! child* next)
(if next
(set-node-previous! next child*))))
- (vector-set! root-nodes byte child*))))
+ (vector-set! (root-nodes state) byte child*))))
-(define (set-oldest-node node pointer)
+(define (set-oldest state node pointer)
(let ((node
(do ((node node (node-newer node)))
((not (fix:= (node-pointer node) pointer)) node))))
(if (not (eq? node oldest-node))
(let ((older (node-older node)))
(set-node-older! node false)
- (set! oldest-node node)
+ (set-oldest-node! state node)
;; We don't have to do anything complicated to delete a node.
;; If the node has any children, we know that they are also
;; being deleted, because a descendant cannot be newer than
((not node))
(let ((parent (node-parent node)))
(cond ((not parent)
- (vector-set! root-nodes (node-byte node) false))
+ (vector-set! (root-nodes state) (node-byte node) false))
((node-nb parent)
- (delete-child parent node))))
+ (delete-child state parent node))))
(set-node-nb! node true))
unspecific))))
-(define (delete-child parent child)
+(define (delete-child state parent child)
(let ((previous (node-previous child))
(next (node-next child)))
(if next
;; If only one child remains, splice out PARENT.
(if (not (node-next child))
(begin
- (replace-child parent child)
+ (replace-child state parent child)
(let ((older (node-older parent))
(newer (node-newer parent)))
(if older
(set-node-newer! older newer))
(if newer
(set-node-older! newer older))
- (if (eq? parent oldest-node)
- (set! oldest-node newer))
- (if (eq? parent newest-node)
- (set! newest-node older))
+ (if (eq? parent (oldest-node state))
+ (set-oldest-node! state newer))
+ (if (eq? parent (newest-node state))
+ (set-newest-node! state older))
unspecific)))))
\f
;;;; The Byte Buffer
;;; integral multiple of this number.
(define-integrable buffer-read 4096)
-(define compress-continuation)
-(define byte-buffer)
-
(define-structure (bb (constructor make-byte-buffer ()))
(vector (make-string buffer-size) read-only true)
(ptr 0)
(end 0)
(eof? false))
-(define (byte-ready?)
- (let ((bb byte-buffer))
+(define (byte-ready? state)
+ (let ((bb (byte-buffer state)))
(if (fix:= (bb-ptr bb) (bb-end bb))
- (guarantee-buffer-data bb true)
+ (guarantee-buffer-data state bb true)
true)))
-(define (read-byte)
+(define (read-byte state)
;; Get a byte from the byte buffer. If we are reading bytes in the
;; process of generating a copy command, NODE is the current
;; position in the copy, otherwise it is #F. If we encounter EOF
;; while reading this byte, NODE is used to emit the final command.
- (let ((bb byte-buffer))
- (let ((byte (%peek-byte bb)))
+ (let ((bb (byte-buffer state)))
+ (let ((byte (%peek-byte state bb)))
(%discard-byte bb)
byte)))
-(define (peek-byte)
- (%peek-byte byte-buffer))
+(define (peek-byte state)
+ (%peek-byte state (byte-buffer state)))
-(define (discard-byte)
- (%discard-byte byte-buffer))
+(define (discard-byte state)
+ (%discard-byte (byte-buffer state)))
(declare (integrate-operator %peek-byte %discard-byte))
-(define (%peek-byte bb)
+(define (%peek-byte state bb)
(if (fix:= (bb-ptr bb) (bb-end bb))
- (guarantee-buffer-data bb false))
+ (guarantee-buffer-data state bb false))
(vector-8b-ref (bb-vector bb) (bb-ptr bb)))
(define (%discard-byte bb)
0
(fix:+ (bb-ptr bb) 1))))
-(define (unread-byte)
- (let ((bb byte-buffer))
+(define (unread-byte state)
+ (let ((bb (byte-buffer state)))
(set-bb-ptr! bb
(if (fix:= (bb-ptr bb) 0)
(fix:- buffer-size 1)
(fix:- (bb-ptr bb) 1)))))
-(define (unread-bytes nb)
- (let ((bb byte-buffer))
+(define (unread-bytes state nb)
+ (let ((bb (byte-buffer state)))
(set-bb-ptr! bb
(let ((ptr (fix:- (bb-ptr bb) nb)))
(if (fix:< ptr 0)
(fix:+ ptr buffer-size)
ptr)))))
-(define (node-ref node nb)
+(define (node-ref node nb state)
;; Read byte NB in the string for NODE.
- (vector-8b-ref (bb-vector byte-buffer)
+ (vector-8b-ref (bb-vector (byte-buffer state))
(let ((bp (fix:+ (node-bp node) nb)))
(if (fix:< bp buffer-size)
bp
(fix:- bp buffer-size)))))
\f
-(define (guarantee-buffer-data bb probe?)
+(define (guarantee-buffer-data state bb probe?)
;; We have read all of the bytes in the buffer, so it's time to get
;; some more. If PROBE? is false and we're at EOF, do a non-local
;; exit to finish the compression. If the last read was short, that
(if (bb-eof? bb)
(if probe?
false
- (compress-finished))
+ (compress-finished state))
(let* ((end (bb-end bb))
(end* (fix:+ end buffer-read)))
;; Calls to GUARANTEE-BUFFER-SPACE make sure that this read will
;; couldn't be sure that any nodes we were holding were valid
;; across a call to READ-BYTE.
(let ((nb
- (input-port/read-substring! input-port
+ (input-port/read-substring! (input-port state)
(bb-vector bb) end end*)))
(cond ((not nb)
- (error "Input port must be in blocking mode:" input-port)
+ (error "Input port must be in blocking mode:"
+ (input-port state))
false)
((fix:= nb buffer-read)
;; A full block was read.
(begin
(set-bb-eof?! bb true)
false)
- (compress-finished)))
+ (compress-finished state)))
((and (fix:< 0 nb) (fix:< nb buffer-read))
;; A partial block was read, meaning that
;; this is the last block. Set BB-EOF? to
(error "Illegal result from read:" nb buffer-read)
false))))))
-(define (compress-finished)
+(define (compress-finished state)
;; This is called from GUARANTEE-BUFFER-DATA when EOF is
;; encountered. If any data remains in the buffer which has not yet
;; been emitted as a literal or copy, it is emitted as a literal.
- (let ((bp command-bp)
- (ptr (bb-ptr byte-buffer)))
+ (let ((bp (command-bp state))
+ (ptr (bb-ptr (byte-buffer state))))
(if (not (fix:= ptr bp))
(let loop
((nb (fix:- (if (fix:< bp ptr) ptr (fix:+ ptr buffer-size)) bp)))
(if (fix:<= nb literal-max)
- (write-literal nb)
+ (write-literal state nb)
(begin
- (write-literal literal-max)
+ (write-literal state literal-max)
(loop (fix:- nb literal-max)))))))
- (compress-continuation unspecific))
+ ((compress-continuation state) unspecific))
\f
-(define (guarantee-buffer-space nb)
+(define (guarantee-buffer-space state nb)
;; Make sure that the byte buffer has enough space to hold NB bytes.
;; If necessary, invalidate old commands until this is true. If the
;; buffer size is optimal, this is never necessary, because the
;; buffer is big enough to hold all of the commands in the window.
- (declare (ignorable nb))
+ (declare (ignore state nb))
(if (and (not buffer-size-optimal?)
- oldest-node)
- (let ((end (bb-end byte-buffer)))
- (if (fix:< (let ((bp command-bp))
- (fix:- (if (fix:<= bp end)
- end
- (fix:+ end buffer-size))
- bp))
- nb)
- (let ((start (node-bp oldest-node))
+ oldest)
+ (let ((end (bb-end (byte-buffer state)))
+ (oldest (oldest-node state)))
+ (if (fix:< (let ((bp (command-bp state)))
+ (fix:- (if (fix:<= bp end)
+ end
+ (fix:+ end buffer-size))
+ bp))
+ nb)
+ (let ((start (node-bp oldest))
(nb (if (fix:< buffer-read nb) nb buffer-read)))
(if (fix:< (fix:- (if (fix:< end start)
- start
- (fix:+ start buffer-size))
- end)
- nb)
+ start
+ (fix:+ start buffer-size))
+ end)
+ nb)
(let ((node
(let ((end
(let ((end (fix:+ end nb)))
end
(fix:- end buffer-size)))))
(if (fix:< start end)
- (do ((node oldest-node (node-newer node)))
+ (do ((node oldest (node-newer node)))
((not
(let ((bp (node-bp node)))
(and (fix:<= start bp)
(fix:< bp end))))
node))
- (do ((node oldest-node (node-newer node)))
+ (do ((node oldest (node-newer node)))
((not
(let ((bp (node-bp node)))
(or (and (fix:<= start bp)
(fix:< bp buffer-size))
(fix:< bp end))))
node))))))
- (set-oldest-node node
- (node-pointer (node-older node))))))))))
+ (set-oldest state node
+ (node-pointer (node-older node))))))))))
\f
;;;; The Encoder
;;; This is the B1 encoder of Fiala and Greene.
;;; is the size of the compression window in pointers.
(define-integrable pointer-max 4096)
-;;; Current "pointer" in input stream. The pointer is updated at each
-;;; literal character and copy command.
-(define current-pointer)
-
-;;; Starting position of current command in byte buffer.
-(define current-bp)
-(define command-bp)
-
-(define (write-literal nb)
+(define (write-literal state nb)
;; Output a literal command of length NB, which is greater than zero
;; and at most LITERAL-MAX.
- (write-byte (fix:- nb 1))
- (let ((string (bb-vector byte-buffer))
- (start command-bp))
+ (write-byte state (fix:- nb 1))
+ (let ((string (bb-vector (byte-buffer state)))
+ (start (command-bp state)))
(let ((end (fix:+ start nb)))
(if (fix:<= end buffer-size)
(begin
- (write-bytes string start end)
- (set! command-bp (if (fix:= end buffer-size) 0 end)))
+ (write-bytes state string start end)
+ (set-command-bp! state (if (fix:= end buffer-size) 0 end)))
(let ((end (fix:- end buffer-size)))
- (write-bytes string start buffer-size)
- (write-bytes string 0 end)
- (set! command-bp end)))))
+ (write-bytes state string start buffer-size)
+ (write-bytes state string 0 end)
+ (set-command-bp! state end)))))
unspecific)
-(define (write-copy nb pointer copy-pointer)
+(define (write-copy state nb pointer copy-pointer)
;; Output a copy command of length NB, which is greater than one
;; and at most COPY-MAX. POINTER is the pointer of the text being
;; copied, while COPY-POINTER is the pointer of the copy command
pointer)))
(if (fix:< displacement 256)
(begin
- (write-byte length)
- (write-byte displacement))
+ (write-byte state length)
+ (write-byte state displacement))
(begin
- (write-byte (fix:+ length (fix:quotient displacement 256)))
- (write-byte (fix:remainder displacement 256)))))
+ (write-byte state (fix:+ length (fix:quotient displacement 256)))
+ (write-byte state (fix:remainder displacement 256)))))
(let ((bp
- (let ((bp (fix:+ current-bp nb)))
+ (let ((bp (fix:+ (current-bp state) nb)))
(if (fix:< bp buffer-size)
bp
(fix:- bp buffer-size)))))
- (set! current-bp bp)
- (set! command-bp bp))
+ (set-current-bp! state bp)
+ (set-command-bp! state bp))
unspecific)
\f
-(define (increment-bp)
- (set! current-bp
- (let ((bp (fix:+ current-bp 1)))
- (if (fix:= bp buffer-size)
- 0
- bp)))
+(define (increment-bp state)
+ (set-current-bp! state
+ (let ((bp (fix:+ (current-bp state) 1)))
+ (if (fix:= bp buffer-size)
+ 0
+ bp)))
unspecific)
-(define (increment-current-pointer)
+(define (increment-current-pointer state)
(let ((pointer
- (let ((pointer (fix:+ current-pointer 1)))
+ (let ((pointer (fix:+ (current-pointer state) 1)))
(if (fix:= pointer pointer-max)
(begin
- (set! window-filled? true)
+ (set-window-filled?! state true)
0)
pointer))))
- (set! current-pointer pointer)
+ (set-current-pointer! state pointer)
;; Invalidate any nodes that refer to the previous command with
;; number POINTER. If WINDOW-FILLED? is false, we haven't yet
;; generated enough commands for such nodes to exist.
- (if window-filled?
- (set-oldest-node oldest-node pointer))))
-
-(define output-buffer)
+ (if (window-filled? state)
+ (set-oldest state (oldest-node state) pointer))))
(define (make-output-buffer)
(cons 0 (make-string 4096)))
-(define (write-byte byte)
- (let ((ob output-buffer))
+(define (write-byte state byte)
+ (let ((ob (output-buffer state)))
(let ((index (car ob)))
(vector-8b-set! (cdr ob) index byte)
(if (fix:= index 4095)
(begin
- (output-port/write-string output-port (cdr ob))
+ (output-port/write-string (output-port state) (cdr ob))
(set-car! ob 0))
(set-car! ob (fix:+ index 1))))))
-(define (write-bytes string start end)
- (let ((ob output-buffer))
+(define (write-bytes state string start end)
+ (let ((ob (output-buffer state)))
(let ((index (car ob)))
(let ((new-index (fix:+ index (fix:- end start))))
(if (fix:< new-index 4096)
(set-car! ob new-index))
(do ((start start (fix:+ start 1)))
((fix:= start end))
- (write-byte (vector-8b-ref string start))))))))
+ (write-byte state (vector-8b-ref string start))))))))
-(define (flush-output-buffer)
- (let ((ob output-buffer))
+(define (flush-output-buffer state)
+ (let ((ob (output-buffer state))
+ (op (output-port state)))
(if (fix:< 0 (car ob))
- (output-port/write-substring output-port (cdr ob) 0 (car ob))))
- (output-port/flush-output output-port))
+ (output-port/write-substring op (cdr ob) 0 (car ob)))
+ (output-port/flush-output op)))
(define (uncompress ifile ofile)
(uncompress-internal ifile ofile