From: Matt Birkholz Date: Thu, 24 Jul 2014 20:24:22 +0000 (-0700) Subject: Unfluidize (runtime compress) internals, e.g. root-nodes. X-Git-Tag: mit-scheme-pucked-9.2.12~402^2~9 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=333b810c86b66ff524d13829164ed9178eddc29f;p=mit-scheme.git Unfluidize (runtime compress) internals, e.g. root-nodes. This is the beginning of the end for fluid-let. Gather all of the fluid variables into a compression-state object. Pass it along as the first argument to many procedures. The result is approx. 10% slower, doing a lot of type and range checking where the original was skipping checks for reference-traps. A file-wide (declare (no-range-checks) (no-type-checks)) got that 10% back PLUS 15%. --- diff --git a/src/runtime/cpress.scm b/src/runtime/cpress.scm index 07efb7247..d1e9bf058 100644 --- a/src/runtime/cpress.scm +++ b/src/runtime/cpress.scm @@ -27,14 +27,6 @@ USA. ;;;; Data Compressor (declare (usual-integrations)) - -;; This declaration is worth up to 30% speedup -(declare - (ignore-reference-traps - (set root-nodes oldest-node newest-node window-filled? byte-buffer))) - -;; This does not seem to make much difference: -(declare (ignore-reference-traps (set current-pointer current-bp command-bp))) ;;; This compression program is based on the algorithm described in ;;; "Data Compression with Finite Windows", by Edward R. Fiala and @@ -93,9 +85,6 @@ USA. ;;; the algorithms B1, B2, and C2. The encoder, which appears below, ;;; determines the algorithm. -(define input-port) -(define output-port) - (define (compress ifile ofile) (call-with-binary-input-file (merge-pathnames ifile) (lambda (input) @@ -104,56 +93,75 @@ USA. (write-string "Compressed-B1-1.00" output) (compress-ports input output)))))) +(begin +(declare (no-range-checks) (no-type-checks)) +(define-structure (compression-state + (conc-name #f) + (constructor make-compression-state + (root-nodes byte-buffer output-buffer + input-port output-port))) + root-nodes + (oldest-node #f) + (newest-node #f) + (window-filled? #f) + (compress-continuation unspecific) + byte-buffer + + ;; Current "pointer" in input stream. The pointer is updated at each + ;; literal character and copy command. + (current-pointer 0) + + ;; Starting position of current command in byte buffer. + (current-bp 0) + (command-bp 0) + + output-buffer + input-port + output-port)) + (define (compress-ports input output) - (fluid-let ((root-nodes (make-vector 256 false)) - (oldest-node false) - (newest-node false) - (window-filled? false) - (compress-continuation) - (byte-buffer (make-byte-buffer)) - (current-pointer 0) - (current-bp 0) - (command-bp 0) - (output-buffer (make-output-buffer)) - (input-port input) - (output-port output)) + (let ((state (make-compression-state + (make-vector 256 false) + (make-byte-buffer) + (make-output-buffer) + input output))) (call-with-current-continuation (lambda (continuation) - (set! compress-continuation continuation) - (idle))) - (flush-output-buffer))) + (set-compress-continuation! state continuation) + (idle state))) + (flush-output-buffer state))) -(define (idle) +(define (idle state) ;; This is the top of the compression loop. We've just emitted a ;; command. If the next two bytes can be matched against some text ;; in the window, start a copy command, otherwise start a literal. - (guarantee-buffer-space 2) - (let ((node (match-first))) + (guarantee-buffer-space state 2) + (let ((node (match-first state))) (if (not node) - (generate-literal) - (let ((node (match-next node 1))) + (generate-literal state) + (let ((node (match-next state node 1))) (if (not node) - (generate-literal) - (generate-copy node 2)))))) + (generate-literal state) + (generate-copy state node 2)))))) -(define (generate-literal) - (guarantee-buffer-space (fix:+ literal-max 2)) +(define (generate-literal state) + (guarantee-buffer-space state (fix:+ literal-max 2)) (letrec ((loop (lambda (nb) - (let ((node (match-first))) + (let ((node (match-first state))) (if (not node) (continue nb) - (let ((node (match-next node 1))) + (let ((node (match-next state node 1))) (if (not node) (continue nb) - (let ((node (match-next node 2))) + (let ((node (match-next state node 2))) (if (not node) (begin - (unread-byte) + (unread-byte state) (continue nb)) (let ((nb* - (let ((cbp current-bp) + (let ((cbp (current-bp state)) (nbp (node-bp node))) (fix:- (if (fix:< cbp nbp) (fix:+ cbp buffer-size) @@ -164,41 +172,41 @@ USA. ;; would result in a copy that is ;; copying from itself. (begin - (unread-bytes 2) + (unread-bytes state 2) (continue nb)) (begin - (write-literal nb) - (generate-copy node 3)))))))))))) + (write-literal state nb) + (generate-copy state node 3)))))))))))) (continue (lambda (nb) - (increment-current-pointer) - (increment-bp) + (increment-current-pointer state) + (increment-bp state) (let ((nb (fix:+ nb 1))) (if (fix:< nb literal-max) (loop nb) (begin - (write-literal nb) - (idle))))))) - (increment-current-pointer) - (increment-bp) + (write-literal state nb) + (idle state))))))) + (increment-current-pointer state) + (increment-bp state) (loop 1))) -(define (generate-copy node nb) - (guarantee-buffer-space copy-max) - (let ((copy-pointer current-pointer)) +(define (generate-copy state node nb) + (guarantee-buffer-space state copy-max) + (let ((copy-pointer (current-pointer state))) (let ((finish (lambda (nb pointer bp) (let ((nb* - (fix:- (let ((bp* command-bp)) + (fix:- (let ((bp* (command-bp state))) (if (fix:< bp* bp) (fix:+ bp* buffer-size) bp*)) bp)) (do-copy (lambda (nb) - (write-copy nb pointer copy-pointer) - (increment-current-pointer) - (idle)))) + (write-copy state nb pointer copy-pointer) + (increment-current-pointer state) + (idle state)))) ;; NB is the number of bytes that we want to write a ;; copy command for; NB* is the number of bytes between ;; the start of the copy and the current position. If @@ -208,16 +216,16 @@ USA. (if (fix:<= nb nb*) (do-copy nb) (begin - (unread-bytes (fix:- nb nb*)) + (unread-bytes state (fix:- nb nb*)) (if (fix:= nb* 1) - (generate-literal) + (generate-literal state) (do-copy nb*)))))))) (let loop ((node node) (nb nb)) (let ((pointer (node-pointer node)) (bp (node-bp node))) - (if (not (byte-ready?)) + (if (not (byte-ready? state)) (finish nb pointer bp) - (let ((node* (match-next node nb))) + (let ((node* (match-next state node nb))) (if (not node*) (finish nb pointer bp) (let ((nb (fix:+ nb 1))) @@ -227,50 +235,45 @@ USA. (finish nb pointer bp) (let ((pointer (node-pointer node*)) (bp (node-bp node*))) - (update-node-pointer node*) + (update-node-pointer state node*) (finish nb pointer bp))))))))))))) -(define (match-first) - (let ((byte (read-byte))) - (let ((node (vector-ref root-nodes byte))) +(define (match-first state) + (let ((byte (read-byte state))) + (let ((node (vector-ref (root-nodes state) byte))) (if (not node) - (add-child false byte (make-node 0))) + (add-child state false byte (make-node state 0))) node))) -(define (match-next node nb) - (let ((byte (peek-byte))) +(define (match-next state node nb) + (let ((byte (peek-byte state))) (if (fix:= (node-nb node) nb) (begin - (update-node-pointer node) + (update-node-pointer state node) (let loop ((child (node-children node))) (cond ((not child) - (add-child node byte (make-node 0)) + (add-child state node byte (make-node state 0)) false) ((fix:= byte (node-byte child)) - (discard-byte) + (discard-byte state) child) (else (loop (node-next child)))))) - (let ((byte* (node-ref node nb))) + (let ((byte* (node-ref node nb state))) (if (fix:= byte byte*) (begin - (discard-byte) + (discard-byte state) node) (begin - (let ((parent (make-node nb))) - (replace-child node parent) - (add-child parent byte* node) - (add-child parent byte (make-node 0))) + (let ((parent (make-node state nb))) + (replace-child state node parent) + (add-child state parent byte* node) + (add-child state parent byte (make-node state 0))) false)))))) ;;;; PATRICIA Tree Database -(define root-nodes) -(define oldest-node) -(define newest-node) -(define window-filled?) - -(define-structure (node (constructor %make-node (nb older))) +(define-structure (node (constructor %make-node (nb older pointer bp))) ;; The parent of this node, or #F for a root node. (parent false) @@ -296,22 +299,23 @@ USA. (newer false) ;; The command pointer for this node. - (pointer current-pointer) + pointer ;; The byte pointer for this node. - (bp current-bp)) - -(define (make-node nb) - (let ((node (%make-node nb newest-node))) - (if newest-node - (set-node-newer! newest-node node) - (set! oldest-node node)) - (set! newest-node node) + bp) + +(define (make-node state nb) + (let ((node (%make-node nb (newest-node state) + (current-pointer state) (current-bp state)))) + (if (newest-node state) + (set-node-newer! (newest-node state) node) + (set-oldest-node! state node)) + (set-newest-node! state node) node)) -(define (update-node-pointer node) - (set-node-pointer! node current-pointer) - (set-node-bp! node current-bp) +(define (update-node-pointer state node) + (set-node-pointer! node (current-pointer state)) + (set-node-bp! node (current-bp state)) (let ((older (node-older node)) (newer (node-newer node))) (if newer @@ -319,14 +323,14 @@ USA. (set-node-older! newer older) (if older (set-node-newer! older newer) - (set! oldest-node newer)) + (set-oldest-node! state newer)) (set-node-newer! node false) - (set-node-older! node newest-node) - (set-node-newer! newest-node node) - (set! newest-node node) + (set-node-older! node (newest-node state)) + (set-node-newer! (newest-node state) node) + (set-newest-node! state node) unspecific)))) -(define (add-child parent byte child) +(define (add-child state parent byte child) (set-node-parent! child parent) (set-node-byte! child byte) (if parent @@ -334,9 +338,9 @@ USA. (set-node-next! child sibling) (if sibling (set-node-previous! sibling child)) (set-node-children! parent child)) - (vector-set! root-nodes byte child))) + (vector-set! (root-nodes state) byte child))) -(define (replace-child child child*) +(define (replace-child state child child*) (let ((parent (node-parent child)) (byte (node-byte child))) (set-node-parent! child* parent) @@ -352,16 +356,16 @@ USA. (set-node-next! child* next) (if next (set-node-previous! next child*)))) - (vector-set! root-nodes byte child*)))) + (vector-set! (root-nodes state) byte child*)))) -(define (set-oldest-node node pointer) +(define (set-oldest state node pointer) (let ((node (do ((node node (node-newer node))) ((not (fix:= (node-pointer node) pointer)) node)))) (if (not (eq? node oldest-node)) (let ((older (node-older node))) (set-node-older! node false) - (set! oldest-node node) + (set-oldest-node! state node) ;; We don't have to do anything complicated to delete a node. ;; If the node has any children, we know that they are also ;; being deleted, because a descendant cannot be newer than @@ -376,13 +380,13 @@ USA. ((not node)) (let ((parent (node-parent node))) (cond ((not parent) - (vector-set! root-nodes (node-byte node) false)) + (vector-set! (root-nodes state) (node-byte node) false)) ((node-nb parent) - (delete-child parent node)))) + (delete-child state parent node)))) (set-node-nb! node true)) unspecific)))) -(define (delete-child parent child) +(define (delete-child state parent child) (let ((previous (node-previous child)) (next (node-next child))) (if next @@ -394,17 +398,17 @@ USA. ;; If only one child remains, splice out PARENT. (if (not (node-next child)) (begin - (replace-child parent child) + (replace-child state parent child) (let ((older (node-older parent)) (newer (node-newer parent))) (if older (set-node-newer! older newer)) (if newer (set-node-older! newer older)) - (if (eq? parent oldest-node) - (set! oldest-node newer)) - (if (eq? parent newest-node) - (set! newest-node older)) + (if (eq? parent (oldest-node state)) + (set-oldest-node! state newer)) + (if (eq? parent (newest-node state)) + (set-newest-node! state older)) unspecific))))) ;;;; The Byte Buffer @@ -420,42 +424,39 @@ USA. ;;; integral multiple of this number. (define-integrable buffer-read 4096) -(define compress-continuation) -(define byte-buffer) - (define-structure (bb (constructor make-byte-buffer ())) (vector (make-string buffer-size) read-only true) (ptr 0) (end 0) (eof? false)) -(define (byte-ready?) - (let ((bb byte-buffer)) +(define (byte-ready? state) + (let ((bb (byte-buffer state))) (if (fix:= (bb-ptr bb) (bb-end bb)) - (guarantee-buffer-data bb true) + (guarantee-buffer-data state bb true) true))) -(define (read-byte) +(define (read-byte state) ;; Get a byte from the byte buffer. If we are reading bytes in the ;; process of generating a copy command, NODE is the current ;; position in the copy, otherwise it is #F. If we encounter EOF ;; while reading this byte, NODE is used to emit the final command. - (let ((bb byte-buffer)) - (let ((byte (%peek-byte bb))) + (let ((bb (byte-buffer state))) + (let ((byte (%peek-byte state bb))) (%discard-byte bb) byte))) -(define (peek-byte) - (%peek-byte byte-buffer)) +(define (peek-byte state) + (%peek-byte state (byte-buffer state))) -(define (discard-byte) - (%discard-byte byte-buffer)) +(define (discard-byte state) + (%discard-byte (byte-buffer state))) (declare (integrate-operator %peek-byte %discard-byte)) -(define (%peek-byte bb) +(define (%peek-byte state bb) (if (fix:= (bb-ptr bb) (bb-end bb)) - (guarantee-buffer-data bb false)) + (guarantee-buffer-data state bb false)) (vector-8b-ref (bb-vector bb) (bb-ptr bb))) (define (%discard-byte bb) @@ -464,30 +465,30 @@ USA. 0 (fix:+ (bb-ptr bb) 1)))) -(define (unread-byte) - (let ((bb byte-buffer)) +(define (unread-byte state) + (let ((bb (byte-buffer state))) (set-bb-ptr! bb (if (fix:= (bb-ptr bb) 0) (fix:- buffer-size 1) (fix:- (bb-ptr bb) 1))))) -(define (unread-bytes nb) - (let ((bb byte-buffer)) +(define (unread-bytes state nb) + (let ((bb (byte-buffer state))) (set-bb-ptr! bb (let ((ptr (fix:- (bb-ptr bb) nb))) (if (fix:< ptr 0) (fix:+ ptr buffer-size) ptr))))) -(define (node-ref node nb) +(define (node-ref node nb state) ;; Read byte NB in the string for NODE. - (vector-8b-ref (bb-vector byte-buffer) + (vector-8b-ref (bb-vector (byte-buffer state)) (let ((bp (fix:+ (node-bp node) nb))) (if (fix:< bp buffer-size) bp (fix:- bp buffer-size))))) -(define (guarantee-buffer-data bb probe?) +(define (guarantee-buffer-data state bb probe?) ;; We have read all of the bytes in the buffer, so it's time to get ;; some more. If PROBE? is false and we're at EOF, do a non-local ;; exit to finish the compression. If the last read was short, that @@ -495,7 +496,7 @@ USA. (if (bb-eof? bb) (if probe? false - (compress-finished)) + (compress-finished state)) (let* ((end (bb-end bb)) (end* (fix:+ end buffer-read))) ;; Calls to GUARANTEE-BUFFER-SPACE make sure that this read will @@ -505,10 +506,11 @@ USA. ;; couldn't be sure that any nodes we were holding were valid ;; across a call to READ-BYTE. (let ((nb - (input-port/read-substring! input-port + (input-port/read-substring! (input-port state) (bb-vector bb) end end*))) (cond ((not nb) - (error "Input port must be in blocking mode:" input-port) + (error "Input port must be in blocking mode:" + (input-port state)) false) ((fix:= nb buffer-read) ;; A full block was read. @@ -520,7 +522,7 @@ USA. (begin (set-bb-eof?! bb true) false) - (compress-finished))) + (compress-finished state))) ((and (fix:< 0 nb) (fix:< nb buffer-read)) ;; A partial block was read, meaning that ;; this is the last block. Set BB-EOF? to @@ -533,44 +535,45 @@ USA. (error "Illegal result from read:" nb buffer-read) false)))))) -(define (compress-finished) +(define (compress-finished state) ;; This is called from GUARANTEE-BUFFER-DATA when EOF is ;; encountered. If any data remains in the buffer which has not yet ;; been emitted as a literal or copy, it is emitted as a literal. - (let ((bp command-bp) - (ptr (bb-ptr byte-buffer))) + (let ((bp (command-bp state)) + (ptr (bb-ptr (byte-buffer state)))) (if (not (fix:= ptr bp)) (let loop ((nb (fix:- (if (fix:< bp ptr) ptr (fix:+ ptr buffer-size)) bp))) (if (fix:<= nb literal-max) - (write-literal nb) + (write-literal state nb) (begin - (write-literal literal-max) + (write-literal state literal-max) (loop (fix:- nb literal-max))))))) - (compress-continuation unspecific)) + ((compress-continuation state) unspecific)) -(define (guarantee-buffer-space nb) +(define (guarantee-buffer-space state nb) ;; Make sure that the byte buffer has enough space to hold NB bytes. ;; If necessary, invalidate old commands until this is true. If the ;; buffer size is optimal, this is never necessary, because the ;; buffer is big enough to hold all of the commands in the window. - (declare (ignorable nb)) + (declare (ignore state nb)) (if (and (not buffer-size-optimal?) - oldest-node) - (let ((end (bb-end byte-buffer))) - (if (fix:< (let ((bp command-bp)) - (fix:- (if (fix:<= bp end) - end - (fix:+ end buffer-size)) - bp)) - nb) - (let ((start (node-bp oldest-node)) + oldest) + (let ((end (bb-end (byte-buffer state))) + (oldest (oldest-node state))) + (if (fix:< (let ((bp (command-bp state))) + (fix:- (if (fix:<= bp end) + end + (fix:+ end buffer-size)) + bp)) + nb) + (let ((start (node-bp oldest)) (nb (if (fix:< buffer-read nb) nb buffer-read))) (if (fix:< (fix:- (if (fix:< end start) - start - (fix:+ start buffer-size)) - end) - nb) + start + (fix:+ start buffer-size)) + end) + nb) (let ((node (let ((end (let ((end (fix:+ end nb))) @@ -578,21 +581,21 @@ USA. end (fix:- end buffer-size))))) (if (fix:< start end) - (do ((node oldest-node (node-newer node))) + (do ((node oldest (node-newer node))) ((not (let ((bp (node-bp node))) (and (fix:<= start bp) (fix:< bp end)))) node)) - (do ((node oldest-node (node-newer node))) + (do ((node oldest (node-newer node))) ((not (let ((bp (node-bp node))) (or (and (fix:<= start bp) (fix:< bp buffer-size)) (fix:< bp end)))) node)))))) - (set-oldest-node node - (node-pointer (node-older node)))))))))) + (set-oldest state node + (node-pointer (node-older node)))))))))) ;;;; The Encoder ;;; This is the B1 encoder of Fiala and Greene. @@ -607,32 +610,24 @@ USA. ;;; is the size of the compression window in pointers. (define-integrable pointer-max 4096) -;;; Current "pointer" in input stream. The pointer is updated at each -;;; literal character and copy command. -(define current-pointer) - -;;; Starting position of current command in byte buffer. -(define current-bp) -(define command-bp) - -(define (write-literal nb) +(define (write-literal state nb) ;; Output a literal command of length NB, which is greater than zero ;; and at most LITERAL-MAX. - (write-byte (fix:- nb 1)) - (let ((string (bb-vector byte-buffer)) - (start command-bp)) + (write-byte state (fix:- nb 1)) + (let ((string (bb-vector (byte-buffer state))) + (start (command-bp state))) (let ((end (fix:+ start nb))) (if (fix:<= end buffer-size) (begin - (write-bytes string start end) - (set! command-bp (if (fix:= end buffer-size) 0 end))) + (write-bytes state string start end) + (set-command-bp! state (if (fix:= end buffer-size) 0 end))) (let ((end (fix:- end buffer-size))) - (write-bytes string start buffer-size) - (write-bytes string 0 end) - (set! command-bp end))))) + (write-bytes state string start buffer-size) + (write-bytes state string 0 end) + (set-command-bp! state end))))) unspecific) -(define (write-copy nb pointer copy-pointer) +(define (write-copy state nb pointer copy-pointer) ;; Output a copy command of length NB, which is greater than one ;; and at most COPY-MAX. POINTER is the pointer of the text being ;; copied, while COPY-POINTER is the pointer of the copy command @@ -645,60 +640,58 @@ USA. pointer))) (if (fix:< displacement 256) (begin - (write-byte length) - (write-byte displacement)) + (write-byte state length) + (write-byte state displacement)) (begin - (write-byte (fix:+ length (fix:quotient displacement 256))) - (write-byte (fix:remainder displacement 256))))) + (write-byte state (fix:+ length (fix:quotient displacement 256))) + (write-byte state (fix:remainder displacement 256))))) (let ((bp - (let ((bp (fix:+ current-bp nb))) + (let ((bp (fix:+ (current-bp state) nb))) (if (fix:< bp buffer-size) bp (fix:- bp buffer-size))))) - (set! current-bp bp) - (set! command-bp bp)) + (set-current-bp! state bp) + (set-command-bp! state bp)) unspecific) -(define (increment-bp) - (set! current-bp - (let ((bp (fix:+ current-bp 1))) - (if (fix:= bp buffer-size) - 0 - bp))) +(define (increment-bp state) + (set-current-bp! state + (let ((bp (fix:+ (current-bp state) 1))) + (if (fix:= bp buffer-size) + 0 + bp))) unspecific) -(define (increment-current-pointer) +(define (increment-current-pointer state) (let ((pointer - (let ((pointer (fix:+ current-pointer 1))) + (let ((pointer (fix:+ (current-pointer state) 1))) (if (fix:= pointer pointer-max) (begin - (set! window-filled? true) + (set-window-filled?! state true) 0) pointer)))) - (set! current-pointer pointer) + (set-current-pointer! state pointer) ;; Invalidate any nodes that refer to the previous command with ;; number POINTER. If WINDOW-FILLED? is false, we haven't yet ;; generated enough commands for such nodes to exist. - (if window-filled? - (set-oldest-node oldest-node pointer)))) - -(define output-buffer) + (if (window-filled? state) + (set-oldest state (oldest-node state) pointer)))) (define (make-output-buffer) (cons 0 (make-string 4096))) -(define (write-byte byte) - (let ((ob output-buffer)) +(define (write-byte state byte) + (let ((ob (output-buffer state))) (let ((index (car ob))) (vector-8b-set! (cdr ob) index byte) (if (fix:= index 4095) (begin - (output-port/write-string output-port (cdr ob)) + (output-port/write-string (output-port state) (cdr ob)) (set-car! ob 0)) (set-car! ob (fix:+ index 1)))))) -(define (write-bytes string start end) - (let ((ob output-buffer)) +(define (write-bytes state string start end) + (let ((ob (output-buffer state))) (let ((index (car ob))) (let ((new-index (fix:+ index (fix:- end start)))) (if (fix:< new-index 4096) @@ -711,13 +704,14 @@ USA. (set-car! ob new-index)) (do ((start start (fix:+ start 1))) ((fix:= start end)) - (write-byte (vector-8b-ref string start)))))))) + (write-byte state (vector-8b-ref string start)))))))) -(define (flush-output-buffer) - (let ((ob output-buffer)) +(define (flush-output-buffer state) + (let ((ob (output-buffer state)) + (op (output-port state))) (if (fix:< 0 (car ob)) - (output-port/write-substring output-port (cdr ob) 0 (car ob)))) - (output-port/flush-output output-port)) + (output-port/write-substring op (cdr ob) 0 (car ob))) + (output-port/flush-output op))) (define (uncompress ifile ofile) (uncompress-internal ifile ofile