From eb936acecc9d820a1495f1bb41feb738867515bb Mon Sep 17 00:00:00 2001 From: Arthur Gleckler Date: Wed, 4 Oct 2006 05:51:55 +0000 Subject: [PATCH] Added `port-position' and `set-port-position!' operations on file I/O ports. The `port-position' procedure returns the offset, in bytes, from the beginning of the file. The `set-port-position!' procedure sets the position. Both operations work only on ports that are opened with binary normalizers or denormalizers, i.e. ones that make no transformation. We should consider lifting this restriction, but this is a useful addition even with it. When used with output ports, the port passed to these procedures must also be an input port. The `port-position' procedure works even after a character has been "unread", even for ports with non-single-byte character encodings. --- v7/src/runtime/fileio.scm | 52 +++++++++++++++++++++--- v7/src/runtime/genio.scm | 82 +++++++++++++++++++++++++++++++++++--- v7/src/runtime/port.scm | 8 +++- v7/src/runtime/runtime.pkg | 16 ++++++-- v7/src/runtime/strout.scm | 5 +-- 5 files changed, 144 insertions(+), 19 deletions(-) diff --git a/v7/src/runtime/fileio.scm b/v7/src/runtime/fileio.scm index c24f4e181..600d0ab63 100644 --- a/v7/src/runtime/fileio.scm +++ b/v7/src/runtime/fileio.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: fileio.scm,v 1.27 2005/12/14 05:44:31 cph Exp $ +$Id: fileio.scm,v 1.28 2006/10/04 05:51:55 savannah-arthur Exp $ Copyright 1991,1993,1994,1995,1996,1999 Massachusetts Institute of Technology Copyright 2001,2004,2005 Massachusetts Institute of Technology @@ -31,10 +31,12 @@ USA. (define (initialize-package!) (let ((other-operations - `((WRITE-SELF ,operation/write-self) - (LENGTH ,operation/length) + `((LENGTH ,operation/length) (PATHNAME ,operation/pathname) - (TRUENAME ,operation/truename)))) + (POSITION ,operation/position) + (SET-POSITION! ,operation/set-position!) + (TRUENAME ,operation/truename) + (WRITE-SELF ,operation/write-self)))) (let ((make-type (lambda (source sink) (make-port-type other-operations @@ -70,6 +72,46 @@ USA. (define (operation/write-self port output-port) (write-string " for file: " output-port) (write (->namestring (operation/truename port)) output-port)) + +(define (guarantee-input-port-using-binary-normalizer port) + (if (not (input-buffer-using-binary-normalizer? (port-input-buffer port))) + (error:wrong-type-datum port "port using binary normalizer"))) + +(define (guarantee-output-port-using-binary-denormalizer port) + (if (not (output-buffer-using-binary-denormalizer? (port-output-buffer port))) + (error:wrong-type-datum port "port using binary denormalizer"))) + +(define (operation/position port) + (guarantee-port port 'OPERATION/POSITION) + (if (output-port? port) + (begin + (guarantee-output-port-using-binary-denormalizer port) + (flush-output port) + (channel-file-position (port/output-channel port))) + (let ((input-buffer (port-input-buffer port))) + (guarantee-input-port-using-binary-normalizer port) + (- (channel-file-position (port/input-channel port)) + (input-buffer-free-bytes input-buffer) + (let ((unread-char (port/unread port))) + (if unread-char + ((input-buffer-compute-encoded-character-size input-buffer) + unread-char) + 0)))))) + +(define (operation/set-position! port position) + (guarantee-port port 'OPERATION/SET-POSITION!) + (guarantee-exact-nonnegative-integer position 'OPERATION/SET-POSITION!) + (guarantee-input-port port 'OPERATION/SET-POSITION!) + (cond ((output-port? port) + (guarantee-output-port-using-binary-denormalizer port) + (flush-output port) + (channel-file-set-position (port/output-channel port) + position)) + (else + (guarantee-input-port-using-binary-normalizer port) + (clear-input-buffer (port-input-buffer port)) + (channel-file-set-position (port/input-channel port) + position)))) (define (open-input-file filename) (let* ((pathname (merge-pathnames filename)) @@ -142,7 +184,7 @@ USA. (close-port port) value))) -(define call-with-input-file +(define call-with-input-file (make-call-with-file open-input-file)) (define call-with-binary-input-file diff --git a/v7/src/runtime/genio.scm b/v7/src/runtime/genio.scm index e375c7c94..383984b52 100644 --- a/v7/src/runtime/genio.scm +++ b/v7/src/runtime/genio.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: genio.scm,v 1.40 2006/08/29 03:48:57 cph Exp $ +$Id: genio.scm,v 1.41 2006/10/04 05:51:55 savannah-arthur Exp $ Copyright 1991,1993,1995,1996,1999,2002 Massachusetts Institute of Technology Copyright 2003,2004,2005,2006 Massachusetts Institute of Technology @@ -457,6 +457,7 @@ USA. (define-name-map decoder) (define-name-map encoder) +(define-name-map sizer) (define-name-map normalizer) (define-name-map denormalizer) @@ -528,16 +529,19 @@ USA. (ill-formed-syntax form)))))) (initialize-name-map decoder) (initialize-name-map encoder) + (initialize-name-map sizer) (initialize-name-map normalizer) (initialize-name-map denormalizer))) (set! binary-decoder (name->decoder 'ISO-8859-1)) (set! binary-encoder (name->encoder 'ISO-8859-1)) + (set! binary-sizer (name->sizer 'ISO-8859-1)) (set! binary-normalizer (name->normalizer 'BINARY)) (set! binary-denormalizer (name->denormalizer 'BINARY)) unspecific) (define binary-decoder) (define binary-encoder) +(define binary-sizer) (define binary-normalizer) (define binary-denormalizer) @@ -631,7 +635,8 @@ USA. start end decode - normalize) + normalize + compute-encoded-character-size) (define (make-input-buffer source coder-name normalizer-name) (%make-input-buffer source @@ -642,11 +647,16 @@ USA. (name->normalizer (line-ending ((source/get-channel source)) normalizer-name - #f)))) + #f)) + (name->sizer coder-name))) (define (input-buffer-open? ib) ((source/open? (input-buffer-source ib)))) +(define (clear-input-buffer ib) + (set-input-buffer-start! ib byte-buffer-length) + (set-input-buffer-end! ib byte-buffer-length)) + (define (close-input-buffer ib) (set-input-buffer-start! ib 0) (set-input-buffer-end! ib 0) @@ -664,6 +674,9 @@ USA. (define-integrable (input-buffer-byte-count ib) (fix:- (input-buffer-end ib) (input-buffer-start ib))) +(define (input-buffer-encoded-character-size ib char) + ((input-buffer-compute-encoded-character-size ib) char)) + (define (read-next-char ib) ((input-buffer-normalize ib) ib)) @@ -745,6 +758,10 @@ USA. (substring-move! contents 0 n bv 0) (set-input-buffer-start! ib 0) (set-input-buffer-end! ib n)))) + +(define (input-buffer-free-bytes ib) + (fix:- (input-buffer-end ib) + (input-buffer-start ib))) (define (read-substring:wide-string ib string start end) (let ((v (wide-string-contents string))) @@ -802,6 +819,9 @@ USA. (and (eq? (input-buffer-decode ib) binary-decoder) (eq? (input-buffer-normalize ib) binary-normalizer))) +(define (input-buffer-using-binary-normalizer? ib) + (eq? (input-buffer-normalize ib) binary-normalizer)) + (define (read-to-8-bit ib string start end) (let ((n (let loop ((i start)) @@ -921,6 +941,9 @@ USA. (and (eq? (output-buffer-encode ib) binary-encoder) (eq? (output-buffer-denormalize ib) binary-denormalizer))) +(define (output-buffer-using-binary-denormalizer? ib) + (eq? (output-buffer-denormalize ib) binary-denormalizer)) + (define (encode-char ob char) (set-output-buffer-start! ob @@ -1011,12 +1034,18 @@ USA. (vector-8b-set! (output-buffer-bytes ob) (output-buffer-start ob) cp) 1)) +(define-sizer 'ISO-8859-1 + (lambda (cp) 1)) + (define-decoder-alias 'BINARY 'ISO-8859-1) (define-encoder-alias 'BINARY 'ISO-8859-1) +(define-sizer-alias 'BINARY 'ISO-8859-1) (define-decoder-alias 'TEXT 'ISO-8859-1) (define-encoder-alias 'TEXT 'ISO-8859-1) +(define-sizer-alias 'TEXT 'ISO-8859-1) (define-decoder-alias 'US-ASCII 'ISO-8859-1) (define-encoder-alias 'ASCII 'ISO-8859-1) +(define-sizer-alias 'US-ASCII 'ISO-8859-1) (define-syntax define-8-bit-codecs (sc-macro-transformer @@ -1038,7 +1067,10 @@ USA. (DEFINE-ENCODER ',name (RECEIVE (LHS RHS) (REVERSE-ISO-8859-MAP ,start ',code-points) (LAMBDA (OB CP) - (ENCODE-8-BIT OB CP ,start LHS RHS)))))) + (ENCODE-8-BIT OB CP ,start LHS RHS)))) + (DEFINE-SIZER ',name + (LAMBDA (CP) + (SIZE-8-BIT CP))))) (ill-formed-syntax form))))) (define (decode-8-bit ib table) @@ -1074,6 +1106,9 @@ USA. (let ((lhs (make-vector n)) (rhs (make-vector-8b n))) (do ((alist (sort (let loop ((code-points code-points) (i start)) + +(define (size-iso-8859 cp) + 1) (if (pair? code-points) (if (car code-points) (cons (cons (car code-points) i) @@ -1425,6 +1460,14 @@ USA. #x0160 #x0143 #x0145 #x00d3 #x014c #x00d5 #x00d6 #x00d7 #x0172 #x0141 #x015a #x016a #x00dc #x017b #x017d #x00df #x0105 #x012f #x0101 #x0107 #x00e4 #x00e5 #x0119 #x0113 +(define-sizer 'UTF-8 + (lambda (cp) + (cond ((fix:< cp #x00000080) 1) + ((fix:< cp #x00000800) 2) + ((fix:< cp #x00010000) 3) + ((fix:< cp #x00110000) 4) + (else (error:char-encoding ob cp))))) + #x010d #x00e9 #x017a #x0117 #x0123 #x0137 #x012b #x013c #x0161 #x0144 #x0146 #x00f3 #x014d #x00f5 #x00f6 #x00f7 #x0173 #x0142 #x015b #x016b #x00fc #x017c #x017e #x02d9) @@ -1622,7 +1665,8 @@ USA. 'UTF-16BE 'UTF-16LE)))) (define-decoder-alias 'UTF-16 alias) - (define-encoder-alias 'UTF-16 alias)) + (define-encoder-alias 'UTF-16 alias) + (define-sizer-alias 'UTF-16 alias)) (define-decoder 'UTF-16BE (lambda (ib) @@ -1660,10 +1704,18 @@ USA. (lambda (ob cp) (encode-utf-16 ob cp high-byte low-byte))) +(define-sizer 'UTF-16BE + (lambda (cp) + (size-utf-16 cp))) + (define-encoder 'UTF-16LE (lambda (ob cp) (encode-utf-16 ob cp low-byte high-byte))) +(define-sizer 'UTF-16LE + (lambda (cp) + (size-utf-16 cp))) + (define-integrable (encode-utf-16 ob cp first-byte second-byte) (let ((bv (output-buffer-bytes ob)) (bs (output-buffer-start ob))) @@ -1682,6 +1734,11 @@ USA. (else (error:char-encoding ob cp))))) +(define-integrable (size-utf-16 cp) + (cond ((fix:< cp #x10000) 2) + ((fix:< cp #x110000) 4) + (else (error:char-encoding ob cp)))) + (define-integrable (be-bytes->digit16 b0 b1) (fix:or (fix:lsh b0 8) b1)) (define-integrable (le-bytes->digit16 b0 b1) (fix:or b0 (fix:lsh b1 8))) (define-integrable (high-byte d) (fix:lsh d -8)) @@ -1700,7 +1757,8 @@ USA. 'UTF-32BE 'UTF-32LE)))) (define-decoder-alias 'UTF-32 alias) - (define-encoder-alias 'UTF-32 alias)) + (define-encoder-alias 'UTF-32 alias) + (define-sizer-alias 'UTF-32 alias)) (define-decoder 'UTF-32BE (lambda (ib) @@ -1746,6 +1804,12 @@ USA. 4) (error:char-encoding ob cp)))) +(define-sizer 'UTF-32BE + (lambda (cp) + (if (fix:< cp #x110000) + 4 + (error:char-encoding ob cp)))) + (define-encoder 'UTF-32LE (lambda (ob cp) (if (fix:< cp #x110000) @@ -1757,6 +1821,12 @@ USA. (put-byte bv bs 3 #x00) 4) (error:char-encoding ob cp)))) + +(define-sizer 'UTF-32LE + (lambda (cp) + (if (fix:< cp #x110000) + 4 + (error:char-encoding ob cp)))) ;;;; Normalizers diff --git a/v7/src/runtime/port.scm b/v7/src/runtime/port.scm index 61f375e6e..00e79f4cb 100644 --- a/v7/src/runtime/port.scm +++ b/v7/src/runtime/port.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: port.scm,v 1.42 2006/02/24 17:42:50 cph Exp $ +$Id: port.scm,v 1.43 2006/10/04 05:51:55 savannah-arthur Exp $ Copyright 1991,1992,1993,1994,1997,1999 Massachusetts Institute of Technology Copyright 2001,2002,2003,2004,2005,2006 Massachusetts Institute of Technology @@ -569,6 +569,12 @@ USA. (define-port-operation fresh-line) (define-port-operation flush-output) (define-port-operation discretionary-flush-output)) + +(define (port-position port) + ((port/operation port 'POSITION) port)) + +(define (set-port-position! port position) + ((port/operation port 'SET-POSITION!) port position)) (set-record-type-unparser-method! (lambda (state port) diff --git a/v7/src/runtime/runtime.pkg b/v7/src/runtime/runtime.pkg index 107fa71a7..6234a96d9 100644 --- a/v7/src/runtime/runtime.pkg +++ b/v7/src/runtime/runtime.pkg @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: runtime.pkg,v 14.596 2006/10/02 04:18:15 cph Exp $ +$Id: runtime.pkg,v 14.597 2006/10/04 05:51:55 savannah-arthur Exp $ Copyright 1988,1989,1990,1991,1992,1993 Massachusetts Institute of Technology Copyright 1994,1995,1996,1997,1998,1999 Massachusetts Institute of Technology @@ -1744,7 +1744,14 @@ USA. set-input-buffer-contents!) (export (runtime file-i/o-port) generic-i/o-port-type - make-gstate) + clear-input-buffer + input-buffer-compute-encoded-character-size + input-buffer-free-space + input-buffer-using-binary-normalizer? + make-gstate + output-buffer-using-binary-denormalizer? + port-input-buffer + port-output-buffer) (export (runtime string-input) generic-i/o-port-type make-gstate @@ -1919,6 +1926,7 @@ USA. notification-output-port output-port-type? output-port? + port-position port-type/char-ready? port-type/discard-char port-type/discretionary-flush-output @@ -1976,6 +1984,7 @@ USA. set-current-output-port! set-interaction-i/o-port! set-notification-output-port! + set-port-position! set-port/state! set-trace-output-port! trace-output-port @@ -1984,6 +1993,8 @@ USA. with-notification-output-port with-output-to-port with-trace-output-port) + (export (runtime file-i/o-port) + port/unread) (export (runtime input-port) port/operation/char-ready? port/operation/discard-char @@ -4067,7 +4078,6 @@ USA. get-output-string! (make-accumulator-output-port open-output-string) open-output-string - port-position (with-string-output-port call-with-output-string) with-output-to-string) (initialization (initialize-package!))) diff --git a/v7/src/runtime/strout.scm b/v7/src/runtime/strout.scm index ebf3371f7..a72d9f78f 100644 --- a/v7/src/runtime/strout.scm +++ b/v7/src/runtime/strout.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: strout.scm,v 14.24 2006/08/09 05:48:53 savannah-arthur Exp $ +$Id: strout.scm,v 14.25 2006/10/04 05:51:55 savannah-arthur Exp $ Copyright 1988,1990,1993,1999,2000,2001 Massachusetts Institute of Technology Copyright 2003,2004,2005 Massachusetts Institute of Technology @@ -46,9 +46,6 @@ USA. (define (get-output-string! port) ((port/operation port 'EXTRACT-OUTPUT!) port)) -(define (port-position port) - ((port/operation port 'POSITION) port)) - (define (call-with-output-string generator) (let ((port (open-output-string))) (generator port) -- 2.25.1