From: Chris Hanson Date: Tue, 13 Dec 2005 15:31:02 +0000 (+0000) Subject: Eliminate UTF-xx string ports; use corresponding coding on standard X-Git-Tag: 20090517-FFI~1164 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=4fe235a7bb1bb6ecc736619c2d645843e0f2b3b2;p=mit-scheme.git Eliminate UTF-xx string ports; use corresponding coding on standard string ports instead. --- diff --git a/v7/doc/ref-manual/characters.texi b/v7/doc/ref-manual/characters.texi index 91f04e6aa..7ba7f2420 100644 --- a/v7/doc/ref-manual/characters.texi +++ b/v7/doc/ref-manual/characters.texi @@ -1,5 +1,5 @@ @c This file is part of the MIT/GNU Scheme Reference Manual. -@c $Id: characters.texi,v 1.6 2004/10/15 05:23:31 cph Exp $ +@c $Id: characters.texi,v 1.7 2005/12/13 15:31:02 cph Exp $ @c Copyright 1991,1992,1993,1994,1995 Massachusetts Institute of Technology @c Copyright 1996,1997,1999,2000,2001 Massachusetts Institute of Technology @@ -866,30 +866,6 @@ procedures that implement @dfn{host-endian} order, which is either big-endian or little-endian depending on the underlying computer architecture. -@deffn procedure read-utf8-char port -@deffnx procedure read-utf16-be-char port -@deffnx procedure read-utf16-le-char port -@deffnx procedure read-utf16-char port -@deffnx procedure read-utf32-be-char port -@deffnx procedure read-utf32-le-char port -@deffnx procedure read-utf32-char port -Each of these procedures reads a single wide character from the given -@var{port}. @var{Port} is treated as a stream of bytes encoded in the -corresponding @samp{utfNN} representation. -@end deffn - -@deffn procedure write-utf8-char wide-char port -@deffnx procedure write-utf16-be-char wide-char port -@deffnx procedure write-utf16-le-char wide-char port -@deffnx procedure write-utf32-be-char wide-char port -@deffnx procedure write-utf32-le-char wide-char port -@deffnx procedure write-utf16-char wide-char port -@deffnx procedure write-utf32-char wide-char port -Each of these procedures writes @var{wide-char} to the given @var{port}. -@var{Wide-char} is encoded in the corresponding @samp{utfNN} -representation and written to @var{port} as a stream of bytes. -@end deffn - @deffn procedure utf8-string->wide-string string [start [end]] @deffnx procedure utf16-be-string->wide-string string [start [end]] @deffnx procedure utf16-le-string->wide-string string [start [end]] diff --git a/v7/src/runtime/runtime.pkg b/v7/src/runtime/runtime.pkg index 7317aa5fb..2d261ed61 100644 --- a/v7/src/runtime/runtime.pkg +++ b/v7/src/runtime/runtime.pkg @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: runtime.pkg,v 14.567 2005/12/12 21:48:29 cph Exp $ +$Id: runtime.pkg,v 14.568 2005/12/13 15:29:46 cph Exp $ Copyright 1988,1989,1990,1991,1992,1993 Massachusetts Institute of Technology Copyright 1994,1995,1996,1997,1998,1999 Massachusetts Institute of Technology @@ -4660,13 +4660,6 @@ USA. alphabet->code-points alphabet->string alphabet? - call-with-utf16-be-output-string - call-with-utf16-le-output-string - call-with-utf16-output-string - call-with-utf32-be-output-string - call-with-utf32-le-output-string - call-with-utf32-output-string - call-with-utf8-output-string call-with-wide-output-string char-in-alphabet? char-set->alphabet @@ -4700,29 +4693,8 @@ USA. guarantee-wide-string-index guarantee-wide-substring make-wide-string - open-utf16-be-input-string - open-utf16-be-output-string - open-utf16-input-string - open-utf16-le-input-string - open-utf16-le-output-string - open-utf16-output-string - open-utf32-be-input-string - open-utf32-be-output-string - open-utf32-input-string - open-utf32-le-input-string - open-utf32-le-output-string - open-utf32-output-string - open-utf8-input-string - open-utf8-output-string open-wide-input-string open-wide-output-string - read-utf16-be-char - read-utf16-char - read-utf16-le-char - read-utf32-be-char - read-utf32-char - read-utf32-le-char - read-utf8-char string->alphabet string->utf8-string string->wide-string @@ -4771,14 +4743,7 @@ USA. wide-string-ref wide-string-set! wide-string? - wide-substring - write-utf16-be-char - write-utf16-char - write-utf16-le-char - write-utf32-be-char - write-utf32-char - write-utf32-le-char - write-utf8-char) + wide-substring) (export (runtime parser-buffer) %wide-string-length %wide-string-ref @@ -4787,7 +4752,8 @@ USA. (export (runtime generic-i/o-port) wide-string-contents) (export (runtime input-port) - wide-string-contents)) + wide-string-contents) + (initialization (initialize-package!))) (define-package (runtime uri) (files "url") diff --git a/v7/src/runtime/unicode.scm b/v7/src/runtime/unicode.scm index 150dff5a2..73308ec42 100644 --- a/v7/src/runtime/unicode.scm +++ b/v7/src/runtime/unicode.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: unicode.scm,v 1.24 2005/12/09 07:06:23 riastradh Exp $ +$Id: unicode.scm,v 1.25 2005/12/13 15:29:52 cph Exp $ Copyright 2001,2003,2004,2005 Massachusetts Institute of Technology @@ -131,15 +131,6 @@ USA. (define (port->byte-sink port) (lambda (byte) (write-char (integer->char byte) port))) - -(define ((make-call-with-output-string open-output-string) generator) - (let ((port (open-output-string))) - (generator port) - (get-output-string port))) - -(define (initialize-package!) - (initialize-wide-ports!) - (initialize-utf-ports!)) ;;;; Unicode characters @@ -638,19 +629,6 @@ USA. ;;;; UTF-32 representation -(define (read-utf32-char port) - (if (host-big-endian?) - (read-utf32-be-char port) - (read-utf32-le-char port))) - -(define (read-utf32-be-char port) - (or (source-utf32-be-char (port->byte-source port) 'READ-UTF32-BE-CHAR) - (make-eof-object port))) - -(define (read-utf32-le-char port) - (or (source-utf32-le-char (port->byte-source port) 'READ-UTF32-LE-CHAR) - (make-eof-object port))) - (define (source-utf32-be-char source caller) (source-utf32-char source utf32-be-bytes->code-point caller)) @@ -682,19 +660,6 @@ USA. (fix:lsh b1 8) b0)) -(define (write-utf32-char char port) - (if (host-big-endian?) - (write-utf32-be-char char port) - (write-utf32-le-char char port))) - -(define (write-utf32-be-char char port) - (guarantee-wide-char char 'WRITE-UTF32-BE-CHAR) - (sink-utf32-be-char char (port->byte-sink port))) - -(define (write-utf32-le-char char port) - (guarantee-wide-char char 'WRITE-UTF32-LE-CHAR) - (sink-utf32-le-char char (port->byte-sink port))) - (define-integrable (sink-utf32-be-char char sink) (let ((pt (char->integer char))) (sink 0) @@ -810,19 +775,6 @@ USA. ;;;; UTF-16 representation -(define (read-utf16-char port) - (if (host-big-endian?) - (read-utf16-be-char port) - (read-utf16-le-char port))) - -(define (read-utf16-be-char port) - (or (source-utf16-be-char (port->byte-source port) 'READ-UTF16-BE-CHAR) - (make-eof-object port))) - -(define (read-utf16-le-char port) - (or (source-utf16-le-char (port->byte-source port) 'READ-UTF16-LE-CHAR) - (make-eof-object port))) - (define (source-utf16-be-char source caller) (source-utf16-char source be-bytes->digit16 caller)) @@ -853,19 +805,6 @@ USA. (error "Truncated UTF-16 input.")) (combinator b0 b1))))) -(define (write-utf16-char char port) - (if (host-big-endian?) - (write-utf16-be-char char port) - (write-utf16-le-char char port))) - -(define (write-utf16-be-char char port) - (guarantee-wide-char char 'WRITE-UTF16-BE-CHAR) - (sink-utf16-be-char char (port->byte-sink port))) - -(define (write-utf16-le-char char port) - (guarantee-wide-char char 'WRITE-UTF16-LE-CHAR) - (sink-utf16-le-char char (port->byte-sink port))) - (define-integrable (sink-utf16-be-char char sink) (sink-utf16-char char sink (lambda (digit sink) @@ -885,7 +824,7 @@ USA. (let ((s (fix:- pt #x10000))) (dissecter (fix:or #xD800 (fix:lsh s -10)) sink) (dissecter (fix:or #xDC00 (fix:and s #x3FF)) sink))))) - + (define (utf16-string->wide-string string #!optional start end) (utf-string->wide-string string start end (if (host-big-endian?) @@ -1009,10 +948,6 @@ USA. ;;;; UTF-8 representation -(define (read-utf8-char port) - (or (source-utf8-char (port->byte-source port) 'READ-UTF8-CHAR) - (make-eof-object port))) - (define (source-utf8-char source caller) (let ((b0 (source)) (get-next @@ -1049,10 +984,6 @@ USA. (utf-string->wide-string string start end source-utf8-char 'UTF8-STRING->WIDE-STRING)) - -(define (write-utf8-char char port) - (guarantee-wide-char char 'WRITE-UTF8-CHAR) - (sink-utf8-char char (port->byte-sink port))) (define (sink-utf8-char char sink) (let ((pt (char->integer char))) @@ -1078,7 +1009,7 @@ USA. (sink (subsequent-char 12)) (sink (subsequent-char 6)) (sink (subsequent-char 0)))))) - + (define (wide-string->utf8-string string #!optional start end) (wide-string->utf-string string start end sink-utf8-char @@ -1205,10 +1136,9 @@ USA. ;;;; Wide string ports (define open-wide-output-string) -(define call-with-wide-output-string) (define open-wide-input-string) -(define (initialize-wide-ports!) +(define (initialize-package!) (set! open-wide-output-string (let ((type (make-port-type @@ -1216,7 +1146,6 @@ USA. ,(lambda (port char) (guarantee-wide-char char 'WRITE-CHAR) ((port/state port) char) - ;; Return the number of characters written. 1)) (EXTRACT-OUTPUT ,(lambda (port) @@ -1233,8 +1162,6 @@ USA. #f))) (lambda () (make-port type (open-output-object-buffer))))) - (set! call-with-wide-output-string - (make-call-with-output-string open-wide-output-string)) (set! open-wide-input-string (let ((type (make-port-type @@ -1250,116 +1177,16 @@ USA. (lambda (string #!optional start end) (guarantee-wide-string string 'OPEN-WIDE-INPUT-STRING) (make-port type - (open-input-object-buffer - (wide-string-contents string) - start - end - 'OPEN-WIDE-INPUT-STRING))))) + (open-input-object-buffer (wide-string-contents string) + start + end + 'OPEN-WIDE-INPUT-STRING))))) unspecific) - -;;;; UTF-xx string ports - -(define open-utf8-input-string) -(define open-utf8-output-string) -(define call-with-utf8-output-string) -(define open-utf16-input-string) -(define open-utf16-output-string) -(define call-with-utf16-output-string) -(define open-utf16-be-input-string) -(define open-utf16-be-output-string) -(define call-with-utf16-be-output-string) -(define open-utf16-le-input-string) -(define open-utf16-le-output-string) -(define call-with-utf16-le-output-string) -(define open-utf32-input-string) -(define open-utf32-output-string) -(define call-with-utf32-output-string) -(define open-utf32-be-input-string) -(define open-utf32-be-output-string) -(define call-with-utf32-be-output-string) -(define open-utf32-le-input-string) -(define open-utf32-le-output-string) -(define call-with-utf32-le-output-string) - -(define (initialize-utf-ports!) - (let-syntax - ((define-openers - (sc-macro-transformer - (lambda (form environment) - (if (syntax-match? '(SYMBOL DATUM) (cdr form)) - (let ((root (cadr form)) - (name (caddr form)) - (sink - (lambda (root) - (symbol-append 'SINK- root '-CHAR))) - (source - (lambda (root) - (symbol-append 'SOURCE- root '-CHAR)))) - (let ((prim - (lambda (sink/source) - (if (memq root '(UTF16 UTF32)) - `(IF (HOST-BIG-ENDIAN?) - ,(sink/source (symbol-append root '-BE)) - ,(sink/source (symbol-append root '-LE))) - (sink/source root)))) - (n1 (symbol-append 'OPEN- root '-OUTPUT-STRING)) - (n2 (symbol-append 'CALL-WITH- root '-OUTPUT-STRING)) - (n3 (symbol-append 'OPEN- root '-INPUT-STRING))) - `(BEGIN - (SET! ,n1 - (MAKE-UTF-OUTPUT-OPENER ,name ,(prim sink))) - (SET! ,n2 - (MAKE-CALL-WITH-OUTPUT-STRING ,n1)) - (SET! ,n3 - (MAKE-UTF-INPUT-OPENER ,name ,(prim source)))))) - (ill-formed-syntax form)))))) - (define-openers utf8 "UTF-8") - (define-openers utf16 "UTF-16") - (define-openers utf16-be "UTF-16BE") - (define-openers utf16-le "UTF-16LE") - (define-openers utf32 "UTF-32") - (define-openers utf32-be "UTF-32BE") - (define-openers utf32-le "UTF-32LE") - unspecific)) - -(define (make-utf-output-opener coding-name sink-char) - (let ((type - (make-port-type - `((WRITE-CHAR - ,(lambda (port char) - (guarantee-wide-char char 'WRITE-CHAR) - (sink-char char (port/state port)) - 1)) - (EXTRACT-OUTPUT - ,(lambda (port) - (get-output-bytes (port/state port)))) - (EXTRACT-OUTPUT! - ,(lambda (port) - (get-output-bytes! (port/state port)))) - (WRITE-SELF - ,(let ((suffix (string-append " to " coding-name " string"))) - (lambda (port port*) - port - (write-string suffix port*))))) - #f))) - (lambda () - (make-port type (open-output-byte-buffer))))) - -(define (make-utf-input-opener coding-name source-char) - (let ((type - (make-port-type - `((READ-CHAR - ,(lambda (port) - (or (source-char (port/state port) 'READ-CHAR) - (make-eof-object port)))) - (WRITE-SELF - ,(let ((suffix (string-append " from " coding-name " string"))) - (lambda (port output-port) - port - (write-string suffix output-port))))) - #f))) - (lambda (bytes #!optional start end) - (make-port type (open-input-byte-buffer bytes start end #f))))) + +(define (call-with-wide-output-string generator) + (let ((port (open-wide-output-string))) + (generator port) + (get-output-string port))) (define (utf-string->wide-string string start end source-char caller) (let ((source (open-input-byte-buffer string start end caller))) diff --git a/v7/src/runtime/url.scm b/v7/src/runtime/url.scm index 34db6fcb4..19e74bbcd 100644 --- a/v7/src/runtime/url.scm +++ b/v7/src/runtime/url.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: url.scm,v 1.36 2005/09/08 18:51:30 cph Exp $ +$Id: url.scm,v 1.37 2005/12/13 15:29:58 cph Exp $ Copyright 2000,2001,2003,2004,2005 Massachusetts Institute of Technology @@ -425,9 +425,11 @@ USA. ;; works on ISO 8859-1 strings, and we are using UTF-8 strings. (define (uri-string-downcase string) - (call-with-utf8-output-string + (call-with-output-string (lambda (output) - (let ((input (open-utf8-input-string string))) + (port/set-coding output 'UTF-8) + (let ((input (open-input-string string))) + (port/set-coding input 'UTF-8) (let loop () (let ((char (read-char input))) (if (not (eof-object? char)) diff --git a/v7/src/xml/xml-names.scm b/v7/src/xml/xml-names.scm index 69025b783..e11ebd5d0 100644 --- a/v7/src/xml/xml-names.scm +++ b/v7/src/xml/xml-names.scm @@ -1,8 +1,8 @@ #| -*-Scheme-*- -$Id: xml-names.scm,v 1.8 2004/12/23 04:44:18 cph Exp $ +$Id: xml-names.scm,v 1.9 2005/12/13 15:30:28 cph Exp $ -Copyright 2003,2004 Massachusetts Institute of Technology +Copyright 2003,2004,2005 Massachusetts Institute of Technology This file is part of MIT/GNU Scheme. @@ -242,7 +242,10 @@ USA. (if (or (not c) (let ((i (fix:+ c 1)) (e (string-length s))) - (and (let ((char (read-utf8-char (open-input-string s i e)))) + (and (let ((char + (let ((port (open-input-string s i e))) + (port/set-coding port 'UTF-8) + (read-char port)))) (and (not (eof-object? char)) (not (char=? char #\:)) (char-in-alphabet? char alphabet:name-initial))) diff --git a/v7/src/xml/xml-output.scm b/v7/src/xml/xml-output.scm index ef288773e..93011c937 100644 --- a/v7/src/xml/xml-output.scm +++ b/v7/src/xml/xml-output.scm @@ -1,8 +1,8 @@ #| -*-Scheme-*- -$Id: xml-output.scm,v 1.35 2004/10/15 18:34:20 cph Exp $ +$Id: xml-output.scm,v 1.36 2005/12/13 15:30:33 cph Exp $ -Copyright 2001,2002,2003,2004 Massachusetts Institute of Technology +Copyright 2001,2002,2003,2004,2005 Massachusetts Institute of Technology This file is part of MIT/GNU Scheme. @@ -492,8 +492,9 @@ USA. (define (for-each-wide-char string procedure) (let ((port (open-input-string string))) + (port/set-coding port 'UTF-8) (let loop () - (let ((char (read-utf8-char port))) + (let ((char (read-char port))) (if (not (eof-object? char)) (begin (procedure char) diff --git a/v7/src/xml/xml-parser.scm b/v7/src/xml/xml-parser.scm index 6d7392dce..f54d8faed 100644 --- a/v7/src/xml/xml-parser.scm +++ b/v7/src/xml/xml-parser.scm @@ -1,8 +1,8 @@ #| -*-Scheme-*- -$Id: xml-parser.scm,v 1.64 2004/10/12 23:20:58 cph Exp $ +$Id: xml-parser.scm,v 1.65 2005/12/13 15:30:39 cph Exp $ -Copyright 2001,2002,2003,2004 Massachusetts Institute of Technology +Copyright 2001,2002,2003,2004,2005 Massachusetts Institute of Technology This file is part of MIT/GNU Scheme. @@ -683,7 +683,8 @@ USA. (perror p "Disallowed Unicode character" char)) (call-with-output-string (lambda (port) - (write-utf8-char char port)))))))) + (port/set-coding port 'UTF-8) + (write-char char port)))))))) (*parser (with-pointer p (sbracket "character reference" "&#" ";" diff --git a/v7/src/xml/xml-struct.scm b/v7/src/xml/xml-struct.scm index 9144aff22..e74cb18a7 100644 --- a/v7/src/xml/xml-struct.scm +++ b/v7/src/xml/xml-struct.scm @@ -1,8 +1,8 @@ #| -*-Scheme-*- -$Id: xml-struct.scm,v 1.47 2004/10/15 18:34:22 cph Exp $ +$Id: xml-struct.scm,v 1.48 2005/12/13 15:30:44 cph Exp $ -Copyright 2001,2002,2003,2004 Massachusetts Institute of Technology +Copyright 2001,2002,2003,2004,2005 Massachusetts Institute of Technology This file is part of MIT/GNU Scheme. @@ -161,7 +161,8 @@ USA. (cond ((wide-char? object) (call-with-output-string (lambda (port) - (write-utf8-char object port)))) + (port/set-coding port 'UTF-8) + (write-char object port)))) ((wide-string? object) (wide-string->utf8-string object)) ((and (string? object)