Eliminate UTF-xx string ports; use corresponding coding on standard
authorChris Hanson <org/chris-hanson/cph>
Tue, 13 Dec 2005 15:31:02 +0000 (15:31 +0000)
committerChris Hanson <org/chris-hanson/cph>
Tue, 13 Dec 2005 15:31:02 +0000 (15:31 +0000)
string ports instead.

v7/doc/ref-manual/characters.texi
v7/src/runtime/runtime.pkg
v7/src/runtime/unicode.scm
v7/src/runtime/url.scm
v7/src/xml/xml-names.scm
v7/src/xml/xml-output.scm
v7/src/xml/xml-parser.scm
v7/src/xml/xml-struct.scm

index 91f04e6aac860538e99261c1a88cf1fcc34c5b8f..7ba7f2420ef5f9ba8dd2bd302d1b355f3df89c1c 100644 (file)
@@ -1,5 +1,5 @@
 @c This file is part of the MIT/GNU Scheme Reference Manual.
-@c $Id: characters.texi,v 1.6 2004/10/15 05:23:31 cph Exp $
+@c $Id: characters.texi,v 1.7 2005/12/13 15:31:02 cph Exp $
 
 @c Copyright 1991,1992,1993,1994,1995 Massachusetts Institute of Technology
 @c Copyright 1996,1997,1999,2000,2001 Massachusetts Institute of Technology
@@ -866,30 +866,6 @@ procedures that implement @dfn{host-endian} order, which is either
 big-endian or little-endian depending on the underlying computer
 architecture.
 
-@deffn procedure read-utf8-char port
-@deffnx procedure read-utf16-be-char port
-@deffnx procedure read-utf16-le-char port
-@deffnx procedure read-utf16-char port
-@deffnx procedure read-utf32-be-char port
-@deffnx procedure read-utf32-le-char port
-@deffnx procedure read-utf32-char port
-Each of these procedures reads a single wide character from the given
-@var{port}.  @var{Port} is treated as a stream of bytes encoded in the
-corresponding @samp{utfNN} representation.
-@end deffn
-
-@deffn procedure write-utf8-char wide-char port
-@deffnx procedure write-utf16-be-char wide-char port
-@deffnx procedure write-utf16-le-char wide-char port
-@deffnx procedure write-utf32-be-char wide-char port
-@deffnx procedure write-utf32-le-char wide-char port
-@deffnx procedure write-utf16-char wide-char port
-@deffnx procedure write-utf32-char wide-char port
-Each of these procedures writes @var{wide-char} to the given @var{port}.
-@var{Wide-char} is encoded in the corresponding @samp{utfNN}
-representation and written to @var{port} as a stream of bytes.
-@end deffn
-
 @deffn procedure utf8-string->wide-string string [start [end]]
 @deffnx procedure utf16-be-string->wide-string string [start [end]]
 @deffnx procedure utf16-le-string->wide-string string [start [end]]
index 7317aa5fb9c530e1486d9de975ca7e157901dd20..2d261ed61a36d30f302dd35f93e8f43df3bf1eda 100644 (file)
@@ -1,6 +1,6 @@
 #| -*-Scheme-*-
 
-$Id: runtime.pkg,v 14.567 2005/12/12 21:48:29 cph Exp $
+$Id: runtime.pkg,v 14.568 2005/12/13 15:29:46 cph Exp $
 
 Copyright 1988,1989,1990,1991,1992,1993 Massachusetts Institute of Technology
 Copyright 1994,1995,1996,1997,1998,1999 Massachusetts Institute of Technology
@@ -4660,13 +4660,6 @@ USA.
          alphabet->code-points
          alphabet->string
          alphabet?
-         call-with-utf16-be-output-string
-         call-with-utf16-le-output-string
-         call-with-utf16-output-string
-         call-with-utf32-be-output-string
-         call-with-utf32-le-output-string
-         call-with-utf32-output-string
-         call-with-utf8-output-string
          call-with-wide-output-string
          char-in-alphabet?
          char-set->alphabet
@@ -4700,29 +4693,8 @@ USA.
          guarantee-wide-string-index
          guarantee-wide-substring
          make-wide-string
-         open-utf16-be-input-string
-         open-utf16-be-output-string
-         open-utf16-input-string
-         open-utf16-le-input-string
-         open-utf16-le-output-string
-         open-utf16-output-string
-         open-utf32-be-input-string
-         open-utf32-be-output-string
-         open-utf32-input-string
-         open-utf32-le-input-string
-         open-utf32-le-output-string
-         open-utf32-output-string
-         open-utf8-input-string
-         open-utf8-output-string
          open-wide-input-string
          open-wide-output-string
-         read-utf16-be-char
-         read-utf16-char
-         read-utf16-le-char
-         read-utf32-be-char
-         read-utf32-char
-         read-utf32-le-char
-         read-utf8-char
          string->alphabet
          string->utf8-string
          string->wide-string
@@ -4771,14 +4743,7 @@ USA.
          wide-string-ref
          wide-string-set!
          wide-string?
-         wide-substring
-         write-utf16-be-char
-         write-utf16-char
-         write-utf16-le-char
-         write-utf32-be-char
-         write-utf32-char
-         write-utf32-le-char
-         write-utf8-char)
+         wide-substring)
   (export (runtime parser-buffer)
          %wide-string-length
          %wide-string-ref
@@ -4787,7 +4752,8 @@ USA.
   (export (runtime generic-i/o-port)
          wide-string-contents)
   (export (runtime input-port)
-         wide-string-contents))
+         wide-string-contents)
+  (initialization (initialize-package!)))
 
 (define-package (runtime uri)
   (files "url")
index 150dff5a2485abb3f086d583dbde8bca58aced9d..73308ec42346e06bbdb4a300ef6bf6c8791c5bed 100644 (file)
@@ -1,6 +1,6 @@
 #| -*-Scheme-*-
 
-$Id: unicode.scm,v 1.24 2005/12/09 07:06:23 riastradh Exp $
+$Id: unicode.scm,v 1.25 2005/12/13 15:29:52 cph Exp $
 
 Copyright 2001,2003,2004,2005 Massachusetts Institute of Technology
 
@@ -131,15 +131,6 @@ USA.
 (define (port->byte-sink port)
   (lambda (byte)
     (write-char (integer->char byte) port)))
-
-(define ((make-call-with-output-string open-output-string) generator)
-  (let ((port (open-output-string)))
-    (generator port)
-    (get-output-string port)))
-
-(define (initialize-package!)
-  (initialize-wide-ports!)
-  (initialize-utf-ports!))
 \f
 ;;;; Unicode characters
 
@@ -638,19 +629,6 @@ USA.
 \f
 ;;;; UTF-32 representation
 
-(define (read-utf32-char port)
-  (if (host-big-endian?)
-      (read-utf32-be-char port)
-      (read-utf32-le-char port)))
-
-(define (read-utf32-be-char port)
-  (or (source-utf32-be-char (port->byte-source port) 'READ-UTF32-BE-CHAR)
-      (make-eof-object port)))
-
-(define (read-utf32-le-char port)
-  (or (source-utf32-le-char (port->byte-source port) 'READ-UTF32-LE-CHAR)
-      (make-eof-object port)))
-
 (define (source-utf32-be-char source caller)
   (source-utf32-char source utf32-be-bytes->code-point caller))
 
@@ -682,19 +660,6 @@ USA.
      (fix:lsh b1 8)
      b0))
 
-(define (write-utf32-char char port)
-  (if (host-big-endian?)
-      (write-utf32-be-char char port)
-      (write-utf32-le-char char port)))
-
-(define (write-utf32-be-char char port)
-  (guarantee-wide-char char 'WRITE-UTF32-BE-CHAR)
-  (sink-utf32-be-char char (port->byte-sink port)))
-
-(define (write-utf32-le-char char port)
-  (guarantee-wide-char char 'WRITE-UTF32-LE-CHAR)
-  (sink-utf32-le-char char (port->byte-sink port)))
-\f
 (define-integrable (sink-utf32-be-char char sink)
   (let ((pt (char->integer char)))
     (sink 0)
@@ -810,19 +775,6 @@ USA.
 \f
 ;;;; UTF-16 representation
 
-(define (read-utf16-char port)
-  (if (host-big-endian?)
-      (read-utf16-be-char port)
-      (read-utf16-le-char port)))
-
-(define (read-utf16-be-char port)
-  (or (source-utf16-be-char (port->byte-source port) 'READ-UTF16-BE-CHAR)
-      (make-eof-object port)))
-
-(define (read-utf16-le-char port)
-  (or (source-utf16-le-char (port->byte-source port) 'READ-UTF16-LE-CHAR)
-      (make-eof-object port)))
-
 (define (source-utf16-be-char source caller)
   (source-utf16-char source be-bytes->digit16 caller))
 
@@ -853,19 +805,6 @@ USA.
               (error "Truncated UTF-16 input."))
           (combinator b0 b1)))))
 
-(define (write-utf16-char char port)
-  (if (host-big-endian?)
-      (write-utf16-be-char char port)
-      (write-utf16-le-char char port)))
-
-(define (write-utf16-be-char char port)
-  (guarantee-wide-char char 'WRITE-UTF16-BE-CHAR)
-  (sink-utf16-be-char char (port->byte-sink port)))
-
-(define (write-utf16-le-char char port)
-  (guarantee-wide-char char 'WRITE-UTF16-LE-CHAR)
-  (sink-utf16-le-char char (port->byte-sink port)))
-\f
 (define-integrable (sink-utf16-be-char char sink)
   (sink-utf16-char char sink
                   (lambda (digit sink)
@@ -885,7 +824,7 @@ USA.
        (let ((s (fix:- pt #x10000)))
          (dissecter (fix:or #xD800 (fix:lsh s -10)) sink)
          (dissecter (fix:or #xDC00 (fix:and s #x3FF)) sink)))))
-
+\f
 (define (utf16-string->wide-string string #!optional start end)
   (utf-string->wide-string string start end
                           (if (host-big-endian?)
@@ -1009,10 +948,6 @@ USA.
 \f
 ;;;; UTF-8 representation
 
-(define (read-utf8-char port)
-  (or (source-utf8-char (port->byte-source port) 'READ-UTF8-CHAR)
-      (make-eof-object port)))
-
 (define (source-utf8-char source caller)
   (let ((b0 (source))
        (get-next
@@ -1049,10 +984,6 @@ USA.
   (utf-string->wide-string string start end
                           source-utf8-char
                           'UTF8-STRING->WIDE-STRING))
-\f
-(define (write-utf8-char char port)
-  (guarantee-wide-char char 'WRITE-UTF8-CHAR)
-  (sink-utf8-char char (port->byte-sink port)))
 
 (define (sink-utf8-char char sink)
   (let ((pt (char->integer char)))
@@ -1078,7 +1009,7 @@ USA.
           (sink (subsequent-char 12))
           (sink (subsequent-char 6))
           (sink (subsequent-char 0))))))
-
+\f
 (define (wide-string->utf8-string string #!optional start end)
   (wide-string->utf-string string start end
                           sink-utf8-char
@@ -1205,10 +1136,9 @@ USA.
 ;;;; Wide string ports
 
 (define open-wide-output-string)
-(define call-with-wide-output-string)
 (define open-wide-input-string)
 
-(define (initialize-wide-ports!)
+(define (initialize-package!)
   (set! open-wide-output-string
        (let ((type
               (make-port-type
@@ -1216,7 +1146,6 @@ USA.
                   ,(lambda (port char)
                      (guarantee-wide-char char 'WRITE-CHAR)
                      ((port/state port) char)
-                     ;; Return the number of characters written.
                      1))
                  (EXTRACT-OUTPUT
                   ,(lambda (port)
@@ -1233,8 +1162,6 @@ USA.
                #f)))
          (lambda ()
            (make-port type (open-output-object-buffer)))))
-  (set! call-with-wide-output-string
-       (make-call-with-output-string open-wide-output-string))
   (set! open-wide-input-string
        (let ((type
               (make-port-type
@@ -1250,116 +1177,16 @@ USA.
          (lambda (string #!optional start end)
            (guarantee-wide-string string 'OPEN-WIDE-INPUT-STRING)
            (make-port type
-                      (open-input-object-buffer
-                       (wide-string-contents string)
-                       start
-                       end
-                       'OPEN-WIDE-INPUT-STRING)))))
+                      (open-input-object-buffer (wide-string-contents string)
+                                                start
+                                                end
+                                                'OPEN-WIDE-INPUT-STRING)))))
   unspecific)
-\f
-;;;; UTF-xx string ports
-
-(define open-utf8-input-string)
-(define open-utf8-output-string)
-(define call-with-utf8-output-string)
-(define open-utf16-input-string)
-(define open-utf16-output-string)
-(define call-with-utf16-output-string)
-(define open-utf16-be-input-string)
-(define open-utf16-be-output-string)
-(define call-with-utf16-be-output-string)
-(define open-utf16-le-input-string)
-(define open-utf16-le-output-string)
-(define call-with-utf16-le-output-string)
-(define open-utf32-input-string)
-(define open-utf32-output-string)
-(define call-with-utf32-output-string)
-(define open-utf32-be-input-string)
-(define open-utf32-be-output-string)
-(define call-with-utf32-be-output-string)
-(define open-utf32-le-input-string)
-(define open-utf32-le-output-string)
-(define call-with-utf32-le-output-string)
-
-(define (initialize-utf-ports!)
-  (let-syntax
-      ((define-openers
-        (sc-macro-transformer
-         (lambda (form environment)
-           (if (syntax-match? '(SYMBOL DATUM) (cdr form))
-               (let ((root (cadr form))
-                     (name (caddr form))
-                     (sink
-                      (lambda (root)
-                        (symbol-append 'SINK- root '-CHAR)))
-                     (source
-                      (lambda (root)
-                        (symbol-append 'SOURCE- root '-CHAR))))
-                 (let ((prim
-                        (lambda (sink/source)
-                          (if (memq root '(UTF16 UTF32))
-                              `(IF (HOST-BIG-ENDIAN?)
-                                   ,(sink/source (symbol-append root '-BE))
-                                   ,(sink/source (symbol-append root '-LE)))
-                              (sink/source root))))
-                       (n1 (symbol-append 'OPEN- root '-OUTPUT-STRING))
-                       (n2 (symbol-append 'CALL-WITH- root '-OUTPUT-STRING))
-                       (n3 (symbol-append 'OPEN- root '-INPUT-STRING)))
-                   `(BEGIN
-                      (SET! ,n1
-                            (MAKE-UTF-OUTPUT-OPENER ,name ,(prim sink)))
-                      (SET! ,n2
-                            (MAKE-CALL-WITH-OUTPUT-STRING ,n1))
-                      (SET! ,n3
-                            (MAKE-UTF-INPUT-OPENER ,name ,(prim source))))))
-               (ill-formed-syntax form))))))
-    (define-openers utf8 "UTF-8")
-    (define-openers utf16 "UTF-16")
-    (define-openers utf16-be "UTF-16BE")
-    (define-openers utf16-le "UTF-16LE")
-    (define-openers utf32 "UTF-32")
-    (define-openers utf32-be "UTF-32BE")
-    (define-openers utf32-le "UTF-32LE")
-    unspecific))
-\f
-(define (make-utf-output-opener coding-name sink-char)
-  (let ((type
-        (make-port-type
-         `((WRITE-CHAR
-            ,(lambda (port char)
-               (guarantee-wide-char char 'WRITE-CHAR)
-               (sink-char char (port/state port))
-               1))
-           (EXTRACT-OUTPUT
-            ,(lambda (port)
-               (get-output-bytes (port/state port))))
-           (EXTRACT-OUTPUT!
-            ,(lambda (port)
-               (get-output-bytes! (port/state port))))
-           (WRITE-SELF
-            ,(let ((suffix (string-append " to " coding-name " string")))
-               (lambda (port port*)
-                 port
-                 (write-string suffix port*)))))
-         #f)))
-    (lambda ()
-      (make-port type (open-output-byte-buffer)))))
-
-(define (make-utf-input-opener coding-name source-char)
-  (let ((type
-        (make-port-type
-         `((READ-CHAR
-            ,(lambda (port)
-               (or (source-char (port/state port) 'READ-CHAR)
-                   (make-eof-object port))))
-           (WRITE-SELF
-            ,(let ((suffix (string-append " from " coding-name " string")))
-               (lambda (port output-port)
-                 port
-                 (write-string suffix output-port)))))
-         #f)))
-    (lambda (bytes #!optional start end)
-      (make-port type (open-input-byte-buffer bytes start end #f)))))
+
+(define (call-with-wide-output-string generator)
+  (let ((port (open-wide-output-string)))
+    (generator port)
+    (get-output-string port)))
 
 (define (utf-string->wide-string string start end source-char caller)
   (let ((source (open-input-byte-buffer string start end caller)))
index 34db6fcb4d6f6de6d640fb99effc420e8c8433aa..19e74bbcd81aabfcc8247b0e1a93a0ed2a6df436 100644 (file)
@@ -1,6 +1,6 @@
 #| -*-Scheme-*-
 
-$Id: url.scm,v 1.36 2005/09/08 18:51:30 cph Exp $
+$Id: url.scm,v 1.37 2005/12/13 15:29:58 cph Exp $
 
 Copyright 2000,2001,2003,2004,2005 Massachusetts Institute of Technology
 
@@ -425,9 +425,11 @@ USA.
 ;; works on ISO 8859-1 strings, and we are using UTF-8 strings.
 
 (define (uri-string-downcase string)
-  (call-with-utf8-output-string
+  (call-with-output-string
    (lambda (output)
-     (let ((input (open-utf8-input-string string)))
+     (port/set-coding output 'UTF-8)
+     (let ((input (open-input-string string)))
+       (port/set-coding input 'UTF-8)
        (let loop ()
         (let ((char (read-char input)))
           (if (not (eof-object? char))
index 69025b7839a37b089ed92c78085dc37bc7573686..e11ebd5d06f6694ca17ea2506b59c59a1dd5dd56 100644 (file)
@@ -1,8 +1,8 @@
 #| -*-Scheme-*-
 
-$Id: xml-names.scm,v 1.8 2004/12/23 04:44:18 cph Exp $
+$Id: xml-names.scm,v 1.9 2005/12/13 15:30:28 cph Exp $
 
-Copyright 2003,2004 Massachusetts Institute of Technology
+Copyright 2003,2004,2005 Massachusetts Institute of Technology
 
 This file is part of MIT/GNU Scheme.
 
@@ -242,7 +242,10 @@ USA.
     (if (or (not c)
            (let ((i (fix:+ c 1))
                  (e (string-length s)))
-             (and (let ((char (read-utf8-char (open-input-string s i e))))
+             (and (let ((char
+                         (let ((port (open-input-string s i e)))
+                           (port/set-coding port 'UTF-8)
+                           (read-char port))))
                     (and (not (eof-object? char))
                          (not (char=? char #\:))
                          (char-in-alphabet? char alphabet:name-initial)))
index ef288773ee01b172606a9bf0377d419e07d9efa3..93011c9379108bd5d0b3cb8a2bfb6e92637a302f 100644 (file)
@@ -1,8 +1,8 @@
 #| -*-Scheme-*-
 
-$Id: xml-output.scm,v 1.35 2004/10/15 18:34:20 cph Exp $
+$Id: xml-output.scm,v 1.36 2005/12/13 15:30:33 cph Exp $
 
-Copyright 2001,2002,2003,2004 Massachusetts Institute of Technology
+Copyright 2001,2002,2003,2004,2005 Massachusetts Institute of Technology
 
 This file is part of MIT/GNU Scheme.
 
@@ -492,8 +492,9 @@ USA.
 
 (define (for-each-wide-char string procedure)
   (let ((port (open-input-string string)))
+    (port/set-coding port 'UTF-8)
     (let loop ()
-      (let ((char (read-utf8-char port)))
+      (let ((char (read-char port)))
        (if (not (eof-object? char))
            (begin
              (procedure char)
index 6d7392dce58dc0b4a9a652af79c3c3542b3c5dc1..f54d8faed5d3bfc7ad1f3c93461a6840f46af7c7 100644 (file)
@@ -1,8 +1,8 @@
 #| -*-Scheme-*-
 
-$Id: xml-parser.scm,v 1.64 2004/10/12 23:20:58 cph Exp $
+$Id: xml-parser.scm,v 1.65 2005/12/13 15:30:39 cph Exp $
 
-Copyright 2001,2002,2003,2004 Massachusetts Institute of Technology
+Copyright 2001,2002,2003,2004,2005 Massachusetts Institute of Technology
 
 This file is part of MIT/GNU Scheme.
 
@@ -683,7 +683,8 @@ USA.
                   (perror p "Disallowed Unicode character" char))
               (call-with-output-string
                 (lambda (port)
-                  (write-utf8-char char port))))))))
+                  (port/set-coding port 'UTF-8)
+                  (write-char char port))))))))
     (*parser
      (with-pointer p
        (sbracket "character reference" "&#" ";"
index 9144aff221a9ee96148008c235105d0c436cd6ce..e74cb18a7329649f7eec09d597d4a0b05b6ab47c 100644 (file)
@@ -1,8 +1,8 @@
 #| -*-Scheme-*-
 
-$Id: xml-struct.scm,v 1.47 2004/10/15 18:34:22 cph Exp $
+$Id: xml-struct.scm,v 1.48 2005/12/13 15:30:44 cph Exp $
 
-Copyright 2001,2002,2003,2004 Massachusetts Institute of Technology
+Copyright 2001,2002,2003,2004,2005 Massachusetts Institute of Technology
 
 This file is part of MIT/GNU Scheme.
 
@@ -161,7 +161,8 @@ USA.
   (cond ((wide-char? object)
         (call-with-output-string
           (lambda (port)
-            (write-utf8-char object port))))
+            (port/set-coding port 'UTF-8)
+            (write-char object port))))
        ((wide-string? object)
         (wide-string->utf8-string object))
        ((and (string? object)