Add missing error checking to UTF-8 decoder: was allowing illegal code
authorChris Hanson <org/chris-hanson/cph>
Thu, 27 May 2004 14:03:06 +0000 (14:03 +0000)
committerChris Hanson <org/chris-hanson/cph>
Thu, 27 May 2004 14:03:06 +0000 (14:03 +0000)
points.  Simplify code that checks for illegal code points; some of
the checks were redundant.  Implement object buffering, and use it to
reimplement wide-string format conversions and ports.  Implement input
ports for UTF-xx strings.

v7/src/runtime/unicode.scm

index 0718f22d30b06689266fb63e49b471729de579ec..91c022a5d719a6c0c80a90e7d53fb760dae7bd2c 100644 (file)
@@ -1,6 +1,6 @@
 #| -*-Scheme-*-
 
-$Id: unicode.scm,v 1.17 2004/05/26 17:43:18 cph Exp $
+$Id: unicode.scm,v 1.18 2004/05/27 14:03:06 cph Exp $
 
 Copyright 2001,2003,2004 Massachusetts Institute of Technology
 
@@ -82,13 +82,13 @@ USA.
              (LET* ((,(list-ref form 3)
                      (IF (OR (DEFAULT-OBJECT? ,end) (NOT ,end))
                          (STRING-LENGTH ,string)
-                         (GUARANTEE-SUBSTRING-END-INDEX
-                          ,end (STRING-LENGTH ,string) ,caller)))
+                         (GUARANTEE-LIMITED-INDEX ,end (STRING-LENGTH ,string)
+                                                  ,caller)))
                     (,(list-ref form 2)
                      (IF (OR (DEFAULT-OBJECT? ,start) (NOT ,start))
                          0
-                         (GUARANTEE-SUBSTRING-START-INDEX
-                          ,start ,(list-ref form 3) ,caller))))
+                         (GUARANTEE-LIMITED-INDEX ,start ,(list-ref form 3)
+                                                  ,caller))))
                ,@(map (let ((excludes
                              (list (list-ref form 2) (list-ref form 3))))
                         (lambda (expr)
@@ -138,10 +138,8 @@ USA.
     (get-output-string port)))
 
 (define (initialize-package!)
-  (initialize-output-port!)
-  (initialize-input-port!)
-  (initialize-utf-output-ports!)
-  unspecific)
+  (initialize-wide-ports!)
+  (initialize-utf-ports!))
 \f
 ;;;; Unicode characters
 
@@ -159,9 +157,7 @@ USA.
 
 (define (unicode-code-point? object)
   (and (index-fixnum? object)
-       (if (fix:< object #x10000)
-          (not (illegal-code? object))
-          (fix:< object char-code-limit))))
+       (legal-code-32? object)))
 
 (define-integrable (guarantee-unicode-code-point object caller)
   (if (not (unicode-code-point? object))
@@ -170,7 +166,15 @@ USA.
 (define (error:not-unicode-code-point object caller)
   (error:wrong-type-argument object "a Unicode code point" caller))
 
-(define-integrable (illegal-code? pt)
+(define-integrable (legal-code-32? pt)
+  (if (fix:< pt #x10000)
+      (legal-code-16? pt)
+      (fix:< pt char-code-limit)))
+
+(define-integrable (legal-code-16? pt)
+  (not (illegal-code-16? pt)))
+
+(define-integrable (illegal-code-16? pt)
   (or (fix:= #xD800 (fix:and #xF800 pt))
       (fix:= #xFFFE (fix:and #xFFFE pt))))
 \f
@@ -577,7 +581,7 @@ USA.
 
 (define-integrable (%wide-string-set! string index char)
   (vector-set! (wide-string-contents string) index char))
-\f
+
 (define (wide-substring string start end)
   (guarantee-wide-substring string start end 'WIDE-SUBSTRING)
   (%wide-substring string start end))
@@ -591,7 +595,7 @@ USA.
          ((not (fix:< i end)))
        (vector-set! v2 j (vector-ref v1 i))))
     string*))
-
+\f
 (define-integrable (guarantee-wide-string object caller)
   (if (not (wide-string? object))
       (error:not-wide-string object caller)))
@@ -620,136 +624,45 @@ USA.
 
 (define (guarantee-wide-substring/fail string start end caller)
   (guarantee-wide-string string caller)
-  (guarantee-substring-end-index end (%wide-string-length string) caller)
-  (guarantee-substring-start-index start end caller))
-\f
-(define open-wide-output-string)
-(define call-with-wide-output-string)
-
-(define (initialize-output-port!)
-  (set! open-wide-output-string
-       (let ((type
-              (make-port-type
-               `((WRITE-CHAR
-                  ,(lambda (port char)
-                     (guarantee-wide-char char 'WRITE-CHAR)
-                     (without-interrupts
-                      (lambda ()
-                        (let* ((v (port/state port))
-                               (n (fix:+ (vector-ref v 0) 1)))
-                          (if (fix:< n (vector-length v))
-                              (begin
-                                (vector-set! v n char)
-                                (vector-set! v 0 n))
-                              (let ((v
-                                     (vector-grow
-                                      v
-                                      (fix:- (fix:* (vector-length v) 2)
-                                             1))))
-                                (vector-set! v n char)
-                                (vector-set! v 0 n)
-                                (set-port/state! port v)
-                                v)))))
-                     1))
-                 (EXTRACT-OUTPUT!
-                  ,(lambda (port)
-                     (%make-wide-string
-                      (without-interrupts
-                       (lambda ()
-                         (let ((v (port/state port)))
-                           (subvector v 1 (fix:+ (vector-ref v 0) 1))))))))
-                 (WRITE-SELF
-                  ,(lambda (port port*)
-                     port
-                     (write-string " to wide string" port*))))
-               #f)))
-         (lambda ()
-           (make-port type
-                      (let ((v (make-vector 17)))
-                        (vector-set! v 0 0)
-                        v)))))
-  (set! call-with-wide-output-string
-       (make-call-with-output-string open-wide-output-string))
-  unspecific)
+  (guarantee-limited-index end (%wide-string-length string) caller)
+  (guarantee-limited-index start end caller))
 
 (define (string->wide-string string #!optional start end)
   (guarantee-string string 'STRING->WIDE-STRING)
   (let* ((end
          (if (or (default-object? end) (not end))
              (string-length string)
-             (guarantee-substring-end-index end (string-length string)
-                                            'STRING->WIDE-STRING)))
+             (guarantee-limited-index end (string-length string)
+                                      'STRING->WIDE-STRING)))
         (start
          (if (or (default-object? start) (not start))
              0
-             (guarantee-substring-start-index start end
-                                              'STRING->WIDE-STRING)))
-        (n (fix:- end start))
-        (v (make-vector n)))
+             (guarantee-limited-index start end 'STRING->WIDE-STRING)))
+        (v (make-vector (fix:- end start))))
     (do ((i start (fix:+ i 1))
         (j 0 (fix:+ j 1)))
        ((not (fix:< i end)))
       (vector-set! v j (string-ref string i)))
     (%make-wide-string v)))
-\f
-(define (open-wide-input-string string #!optional start end)
-  (guarantee-wide-string string 'OPEN-WIDE-INPUT-STRING)
-  (let* ((end
+
+(define (wide-string->string string #!optional start end)
+  (guarantee-wide-string string 'WIDE-STRING->STRING)
+  (let* ((v (wide-string-contents string))
+        (end
          (if (or (default-object? end) (not end))
-             (wide-string-length string)
-             (guarantee-substring-end-index end (%wide-string-length string)
-                                            'OPEN-WIDE-INPUT-STRING)))
+             (vector-length v)
+             (guarantee-limited-index end (vector-length v)
+                                      'WIDE-STRING->STRING)))
         (start
          (if (or (default-object? start) (not start))
              0
-             (guarantee-substring-start-index start end
-                                              'OPEN-WIDE-INPUT-STRING))))
-    (make-port ws-input-port-type (make-istate string start end))))
-
-(define ws-input-port-type)
-(define (initialize-input-port!)
-  (set! ws-input-port-type
-       (make-port-type
-        `((CHAR-READY?
-           ,(lambda (port)
-              (let ((s (port/state port)))
-                (fix:< (istate-start s) (istate-end s)))))
-          (READ-CHAR
-           ,(lambda (port)
-              (let ((s (port/state port)))
-                (without-interrupts
-                 (lambda ()
-                   (let ((start (istate-start s)))
-                     (if (fix:< start (istate-end s))
-                         (begin
-                           (set-istate-start! s (fix:+ start 1))
-                           (%wide-string-ref (istate-string s) start))
-                         (make-eof-object port))))))))
-          (WRITE-SELF
-           ,(lambda (port output-port)
-              port
-              (write-string " from wide string" output-port))))
-        #f))
-  unspecific)
-
-(define-structure (istate (type vector))
-  (string #f read-only #t)
-  start
-  (end #f read-only #t))
-
-(define (wide-string->string string #!optional start end)
-  (let ((input
-        (open-wide-input-string string
-                                (if (default-object? start) #f start)
-                                (if (default-object? end) #f end))))
-    (call-with-output-string
-     (lambda (output)
-       (let loop ()
-        (let ((char (read-char input)))
-          (if (not (eof-object? char))
-              (begin
-                (write-char char output)
-                (loop)))))))))
+             (guarantee-limited-index start end 'WIDE-STRING->STRING)))
+        (s (make-string (fix:- end start))))
+    (do ((i start (fix:+ i 1))
+        (j 0 (fix:+ j 1)))
+       ((not (fix:< i end)))
+      (string-set! s j (vector-ref v i)))
+    s))
 \f
 ;;;; UTF-32 representation
 
@@ -781,7 +694,8 @@ USA.
           (if (not (and b1 b2 b3))
               (error "Truncated UTF-32 input."))
           (let ((pt (combiner b0 b1 b2 b3)))
-            (guarantee-unicode-code-point pt caller)
+            (if (not (legal-code-32? pt))
+                (error:not-unicode-code-point pt caller))
             (integer->char pt))))))
 
 (define-integrable (utf32-be-bytes->code-point b0 b1 b2 b3)
@@ -824,69 +738,50 @@ USA.
     (sink 0)))
 \f
 (define (utf32-string->wide-string string #!optional start end)
-  (%utf32-string->wide-string string
-                             (if (default-object? start) #f start)
-                             (if (default-object? end) #f end)
-                             (if (host-big-endian?)
-                                 source-utf32-be-char
-                                 source-utf32-le-char)
-                             'UTF32-STRING->WIDE-STRING))
+  (utf-string->wide-string string
+                          (if (default-object? start) #f start)
+                          (if (default-object? end) #f end)
+                          (if (host-big-endian?)
+                              source-utf32-be-char
+                              source-utf32-le-char)
+                          'UTF32-STRING->WIDE-STRING))
 
 (define (utf32-be-string->wide-string string #!optional start end)
-  (%utf32-string->wide-string string
-                             (if (default-object? start) #f start)
-                             (if (default-object? end) #f end)
-                             source-utf32-be-char
-                             'UTF32-BE-STRING->WIDE-STRING))
+  (utf-string->wide-string string
+                          (if (default-object? start) #f start)
+                          (if (default-object? end) #f end)
+                          source-utf32-be-char
+                          'UTF32-BE-STRING->WIDE-STRING))
 
 (define (utf32-le-string->wide-string string #!optional start end)
-  (%utf32-string->wide-string string
-                             (if (default-object? start) #f start)
-                             (if (default-object? end) #f end)
-                             source-utf32-le-char
-                             'UTF32-LE-STRING->WIDE-STRING))
-
-(define (%utf32-string->wide-string string start end source-utf32-char caller)
-  (let ((source (open-input-byte-buffer string start end)))
-    (call-with-wide-output-string
-     (lambda (output)
-       (let loop ()
-        (let ((char (source-utf32-char source caller)))
-          (if char
-              (begin
-                (write-char char output)
-                (loop)))))))))
+  (utf-string->wide-string string
+                          (if (default-object? start) #f start)
+                          (if (default-object? end) #f end)
+                          source-utf32-le-char
+                          'UTF32-LE-STRING->WIDE-STRING))
 
 (define (wide-string->utf32-string string #!optional start end)
-  (%wide-string->utf32-string string
-                             (if (default-object? start) #f start)
-                             (if (default-object? end) #f end)
-                             (if (host-big-endian?)
-                                 sink-utf32-be-char
-                                 sink-utf32-le-char)))
+  (wide-string->utf-string string
+                          (if (default-object? start) #f start)
+                          (if (default-object? end) #f end)
+                          (if (host-big-endian?)
+                              sink-utf32-be-char
+                              sink-utf32-le-char)
+                          'WIDE-STRING->UTF32-STRING))
 
 (define (wide-string->utf32-be-string string #!optional start end)
-  (%wide-string->utf32-string string
-                             (if (default-object? start) #f start)
-                             (if (default-object? end) #f end)
-                             sink-utf32-be-char))
+  (wide-string->utf-string string
+                          (if (default-object? start) #f start)
+                          (if (default-object? end) #f end)
+                          sink-utf32-be-char
+                          'WIDE-STRING->UTF32-BE-STRING))
 
 (define (wide-string->utf32-le-string string #!optional start end)
-  (%wide-string->utf32-string string
-                             (if (default-object? start) #f start)
-                             (if (default-object? end) #f end)
-                             sink-utf32-le-char))
-
-(define (%wide-string->utf32-string string start end sink-utf32-char)
-  (let ((input (open-wide-input-string string start end)))
-    (call-with-output-byte-buffer
-     (lambda (sink)
-       (let loop ()
-        (let ((char (read-char input)))
-          (if (not (eof-object? char))
-              (begin
-                (sink-utf32-char char sink)
-                (loop)))))))))
+  (wide-string->utf-string string
+                          (if (default-object? start) #f start)
+                          (if (default-object? end) #f end)
+                          sink-utf32-le-char
+                          'WIDE-STRING->UTF32-LE-STRING))
 \f
 (define (utf32-string-length string #!optional start end)
   (if (host-big-endian?)
@@ -960,11 +855,9 @@ USA.
     (vector-8b-ref string (fix:+ start i)))
 
   (if (fix:< start end)
-      (let ((start* (fix:+ start 4)))
-       (and (fix:<= start* end)
-            (let ((pt (combiner (n 0) (n 1) (n 2) (n 3))))
-              (and (unicode-code-point? pt)
-                   start*))))
+      (and (fix:<= (fix:+ start 4) end)
+          (legal-code-32? (combiner (n 0) (n 1) (n 2) (n 3)))
+          (fix:+ start 4))
       start))
 \f
 ;;;; UTF-16 representation
@@ -991,17 +884,18 @@ USA.
 (define-integrable (source-utf16-char source combinator caller)
   (let ((d0 (source-utf16-digit source combinator)))
     (and d0
-        (let ((pt
-               (if (high-surrogate? d0)
-                   (let ((d1 (source-utf16-digit source combinator)))
-                     (if (not d1)
-                         (error "Truncated UTF-16 input."))
-                     (if (not (low-surrogate? d1))
-                         (error "Illegal UTF-16 subsequent digit:" d1))
-                     (combine-surrogates d0 d1))
-                   d0)))
-          (guarantee-unicode-code-point pt caller)
-          (integer->char pt)))))
+        (integer->char
+         (if (high-surrogate? d0)
+             (let ((d1 (source-utf16-digit source combinator)))
+               (if (not d1)
+                   (error "Truncated UTF-16 input."))
+               (if (not (low-surrogate? d1))
+                   (error "Illegal UTF-16 subsequent digit:" d1))
+               (combine-surrogates d0 d1))
+             (begin
+               (if (illegal-code-16? d0)
+                   (error:not-unicode-code-point d0 caller))
+               d0))))))
 
 (define-integrable (source-utf16-digit source combinator)
   (let ((b0 (source)))
@@ -1045,69 +939,50 @@ USA.
          (dissecter (fix:or #xDC00 (fix:and s #x3FF)) sink)))))
 \f
 (define (utf16-string->wide-string string #!optional start end)
-  (%utf16-string->wide-string string
-                             (if (default-object? start) #f start)
-                             (if (default-object? end) #f end)
-                             (if (host-big-endian?)
-                                 source-utf16-be-char
-                                 source-utf16-le-char)
-                             'UTF16-STRING->WIDE-STRING))
+  (utf-string->wide-string string
+                          (if (default-object? start) #f start)
+                          (if (default-object? end) #f end)
+                          (if (host-big-endian?)
+                              source-utf16-be-char
+                              source-utf16-le-char)
+                          'UTF16-STRING->WIDE-STRING))
 
 (define (utf16-be-string->wide-string string #!optional start end)
-  (%utf16-string->wide-string string
-                             (if (default-object? start) #f start)
-                             (if (default-object? end) #f end)
-                             source-utf16-be-char
-                             'UTF16-BE-STRING->WIDE-STRING))
+  (utf-string->wide-string string
+                          (if (default-object? start) #f start)
+                          (if (default-object? end) #f end)
+                          source-utf16-be-char
+                          'UTF16-BE-STRING->WIDE-STRING))
 
 (define (utf16-le-string->wide-string string #!optional start end)
-  (%utf16-string->wide-string string
-                             (if (default-object? start) #f start)
-                             (if (default-object? end) #f end)
-                             source-utf16-le-char
-                             'UTF16-LE-STRING->WIDE-STRING))
-
-(define (%utf16-string->wide-string string start end source-utf16-char caller)
-  (let ((source (open-input-byte-buffer string start end)))
-    (call-with-wide-output-string
-     (lambda (output)
-       (let loop ()
-        (let ((char (source-utf16-char source caller)))
-          (if char
-              (begin
-                (write-char char output)
-                (loop)))))))))
+  (utf-string->wide-string string
+                          (if (default-object? start) #f start)
+                          (if (default-object? end) #f end)
+                          source-utf16-le-char
+                          'UTF16-LE-STRING->WIDE-STRING))
 
 (define (wide-string->utf16-string string #!optional start end)
-  (%wide-string->utf16-string string
-                             (if (default-object? start) #f start)
-                             (if (default-object? end) #f end)
-                             (if (host-big-endian?)
-                                 sink-utf16-be-char
-                                 sink-utf16-le-char)))
+  (wide-string->utf-string string
+                          (if (default-object? start) #f start)
+                          (if (default-object? end) #f end)
+                          (if (host-big-endian?)
+                              sink-utf16-be-char
+                              sink-utf16-le-char)
+                          'WIDE-STRING->UTF16-STRING))
 
 (define (wide-string->utf16-be-string string #!optional start end)
-  (%wide-string->utf16-string string
-                             (if (default-object? start) #f start)
-                             (if (default-object? end) #f end)
-                             sink-utf16-be-char))
+  (wide-string->utf-string string
+                          (if (default-object? start) #f start)
+                          (if (default-object? end) #f end)
+                          sink-utf16-be-char
+                          'WIDE-STRING->UTF16-BE-STRING))
 
 (define (wide-string->utf16-le-string string #!optional start end)
-  (%wide-string->utf16-string string
-                             (if (default-object? start) #f start)
-                             (if (default-object? end) #f end)
-                             sink-utf16-le-char))
-
-(define (%wide-string->utf16-string string start end sink-utf16-char)
-  (let ((input (open-wide-input-string string start end)))
-    (call-with-output-byte-buffer
-     (lambda (sink)
-       (let loop ()
-        (let ((char (read-char input)))
-          (if (not (eof-object? char))
-              (begin
-                (sink-utf16-char char sink)
-                (loop)))))))))
+  (wide-string->utf-string string
+                          (if (default-object? start) #f start)
+                          (if (default-object? end) #f end)
+                          sink-utf16-le-char
+                          'WIDE-STRING->UTF16-LE-STRING))
 \f
 (define (utf16-string-length string #!optional start end)
   (if (host-big-endian?)
@@ -1185,12 +1060,9 @@ USA.
           (let ((d0 (combiner (n 0) (n 1))))
             (if (high-surrogate? d0)
                 (and (fix:<= (fix:+ start 4) end)
-                     (let ((d1 (combiner (n 2) (n 3))))
-                       (and (low-surrogate? d1)
-                            (let ((pt (combine-surrogates d0 d1)))
-                              (and (unicode-code-point? pt)
-                                   (fix:+ start 4))))))
-                (and (unicode-code-point? d0)
+                     (low-surrogate? (combiner (n 2) (n 3)))
+                     (fix:+ start 4))
+                (and (legal-code-16? d0)
                      (fix:+ start 2)))))
       start))
 
@@ -1214,10 +1086,10 @@ USA.
 ;;;; UTF-8 representation
 
 (define (read-utf8-char port)
-  (or (source-utf8-char (port->byte-source port))
+  (or (source-utf8-char (port->byte-source port) 'READ-UTF8-CHAR)
       (make-eof-object port)))
 
-(define (source-utf8-char source)
+(define (source-utf8-char source caller)
   (let ((b0 (source))
        (get-next
         (lambda ()
@@ -1237,7 +1109,10 @@ USA.
                ((fix:< b0 #xF0)
                 (let ((b1 (get-next)))
                   (%vc3 b0 b1)
-                  (%cp3 b0 b1 (get-next))))
+                  (let ((pt (%cp3 b0 b1 (get-next))))
+                    (if (illegal-code-16? pt)
+                        (error:not-unicode-code-point pt caller))
+                    pt)))
                ((fix:< b0 #xF8)
                 (let ((b1 (get-next)))
                   (%vc4 b0 b1)
@@ -1247,18 +1122,11 @@ USA.
                 (error "Illegal UTF-8 byte:" b0)))))))
 
 (define (utf8-string->wide-string string #!optional start end)
-  (let ((source
-        (open-input-byte-buffer string
-                                (if (default-object? start) #f start)
-                                (if (default-object? end) #f end))))
-    (call-with-wide-output-string
-     (lambda (output)
-       (let loop ()
-        (let ((char (source-utf8-char source)))
-          (if char
-              (begin
-                (write-char char output)
-                (loop)))))))))
+  (utf-string->wide-string string
+                          (if (default-object? start) #f start)
+                          (if (default-object? end) #f end)
+                          source-utf8-char
+                          'UTF8-STRING->WIDE-STRING))
 \f
 (define (write-utf8-char char port)
   (guarantee-wide-char char 'WRITE-UTF8-CHAR)
@@ -1290,18 +1158,11 @@ USA.
           (sink (subsequent-char 0))))))
 
 (define (wide-string->utf8-string string #!optional start end)
-  (let ((input
-        (open-wide-input-string string
-                                (if (default-object? start) #f start)
-                                (if (default-object? end) #f end))))
-    (call-with-output-byte-buffer
-     (lambda (sink)
-       (let loop ()
-        (let ((char (read-char input)))
-          (if (not (eof-object? char))
-              (begin
-                (sink-utf8-char char sink)
-                (loop)))))))))
+  (wide-string->utf-string string
+                          (if (default-object? start) #f start)
+                          (if (default-object? end) #f end)
+                          sink-utf8-char
+                          'WIDE-STRING->UTF8-STRING))
 
 (define (utf8-string-length string #!optional start end)
   (with-substring-args string start end 'UTF8-STRING-LENGTH
@@ -1334,6 +1195,7 @@ USA.
                    (check-byte 1)
                    (check-byte 2)
                    (%vs3 b0 (n 1))
+                   (legal-code-16? (%cp3 b0 (n 1) (n 2)))
                    (fix:+ start 3)))
              ((fix:< b0 #xF8)
               (and (fix:<= (fix:+ start 4) end)
@@ -1384,75 +1246,184 @@ USA.
 (define-integrable (%valid-trailer? n)
   (fix:= #x80 (fix:and #xC0 n)))
 \f
+;;;; Wide string ports
+
+(define open-wide-output-string)
+(define call-with-wide-output-string)
+(define open-wide-input-string)
+
+(define (initialize-wide-ports!)
+  (set! open-wide-output-string
+       (let ((type
+              (make-port-type
+               `((WRITE-CHAR
+                  ,(lambda (port char)
+                     (guarantee-wide-char char 'WRITE-CHAR)
+                     ((port/state port) char)))
+                 (EXTRACT-OUTPUT!
+                  ,(lambda (port)
+                     (%make-wide-string
+                      (get-output-objects (port/state port)))))
+                 (WRITE-SELF
+                  ,(lambda (port port*)
+                     port
+                     (write-string " to wide string" port*))))
+               #f)))
+         (lambda ()
+           (make-port type (open-output-object-buffer)))))
+  (set! call-with-wide-output-string
+       (make-call-with-output-string open-wide-output-string))
+  (set! open-wide-input-string
+       (let ((type
+              (make-port-type
+               `((READ-CHAR
+                  ,(lambda (port)
+                     (or ((port/state port))
+                         (make-eof-object port))))
+                 (WRITE-SELF
+                  ,(lambda (port output-port)
+                     port
+                     (write-string " from wide string" output-port))))
+               #f)))
+         (lambda (string #!optional start end)
+           (guarantee-wide-string string 'OPEN-WIDE-INPUT-STRING)
+           (make-port type
+                      (open-input-object-buffer
+                       (wide-string-contents string)
+                       (if (default-object? start) #f start)
+                       (if (default-object? end) #f end)
+                       'OPEN-WIDE-INPUT-STRING)))))
+  unspecific)
+\f
+;;;; UTF-xx string ports
+
+(define open-utf8-input-string)
 (define open-utf8-output-string)
 (define call-with-utf8-output-string)
+(define open-utf16-input-string)
 (define open-utf16-output-string)
 (define call-with-utf16-output-string)
+(define open-utf16-be-input-string)
 (define open-utf16-be-output-string)
 (define call-with-utf16-be-output-string)
+(define open-utf16-le-input-string)
 (define open-utf16-le-output-string)
 (define call-with-utf16-le-output-string)
+(define open-utf32-input-string)
 (define open-utf32-output-string)
 (define call-with-utf32-output-string)
+(define open-utf32-be-input-string)
 (define open-utf32-be-output-string)
 (define call-with-utf32-be-output-string)
+(define open-utf32-le-input-string)
 (define open-utf32-le-output-string)
 (define call-with-utf32-le-output-string)
 
-(define (initialize-utf-output-ports!)
-  (let ((make-opener
-        (lambda (sink-char coding-name)
-          (let ((type
-                 (make-port-type
-                  `((WRITE-CHAR
-                     ,(lambda (port char)
-                        (guarantee-wide-char char 'WRITE-CHAR)
-                        (sink-char char (port/state port))
-                        1))
-                    (EXTRACT-OUTPUT!
-                     ,(lambda (port)
-                        (get-output-bytes (port/state port))))
-                    (WRITE-SELF
-                     ,(let ((description
-                             (string-append " to " coding-name " string")))
-                        (lambda (port port*)
-                          port
-                          (write-string description port*)))))
-                  #f)))
-            (lambda ()
-              (make-port type (open-output-byte-buffer)))))))
-    (let-syntax
-       ((define-openers
-          (sc-macro-transformer
-           (lambda (form environment)
-             (if (syntax-match? '(SYMBOL DATUM expression) (cdr form))
-                 (let ((n0 (symbol-append (cadr form) '-OUTPUT-STRING)))
-                   (let ((n1 (symbol-append 'OPEN- n0))
-                         (n2 (symbol-append 'CALL-WITH- n0)))
-                     `(BEGIN
-                        (SET! ,n1
-                              (MAKE-OPENER ,(cadddr form) ,(caddr form)))
-                        (SET! ,n2
-                              (MAKE-CALL-WITH-OUTPUT-STRING ,n1)))))
-                 (ill-formed-syntax form))))))
-
-      (define-openers utf8 "UTF-8" sink-utf8-char)
-
-      (define-openers utf16 "UTF-16"
-       (if (host-big-endian?)
-           sink-utf16-be-char
-           sink-utf16-le-char))
-      (define-openers utf16-be "UTF-16BE" sink-utf16-be-char)
-      (define-openers utf16-le "UTF-16LE" sink-utf16-le-char)
-
-      (define-openers utf32 "UTF-32"
-       (if (host-big-endian?)
-           sink-utf32-be-char
-           sink-utf32-le-char))
-      (define-openers utf32-be "UTF-32BE" sink-utf32-be-char)
-      (define-openers utf32-le "UTF-32LE" sink-utf32-le-char)
-
-      unspecific)))
+(define (initialize-utf-ports!)
+  (let-syntax
+      ((define-openers
+        (sc-macro-transformer
+         (lambda (form environment)
+           (if (syntax-match? '(SYMBOL DATUM) (cdr form))
+               (let ((root (cadr form))
+                     (name (caddr form))
+                     (sink
+                      (lambda (root)
+                        (symbol-append 'SINK- root '-CHAR)))
+                     (source
+                      (lambda (root)
+                        (symbol-append 'SOURCE- root '-CHAR))))
+                 (let ((prim
+                        (lambda (sink/source)
+                          (if (memq root '(UTF16 UTF32))
+                              `(IF (HOST-BIG-ENDIAN?)
+                                   ,(sink/source (symbol-append root '-BE))
+                                   ,(sink/source (symbol-append root '-LE)))
+                              (sink/source root))))
+                       (n1 (symbol-append 'OPEN- root '-OUTPUT-STRING))
+                       (n2 (symbol-append 'CALL-WITH- root '-OUTPUT-STRING))
+                       (n3 (symbol-append 'OPEN- root '-INPUT-STRING)))
+                   `(BEGIN
+                      (SET! ,n1
+                            (MAKE-UTF-OUTPUT-OPENER ,name ,(prim sink)))
+                      (SET! ,n2
+                            (MAKE-CALL-WITH-OUTPUT-STRING ,n1))
+                      (SET! ,n3
+                            (MAKE-UTF-INPUT-OPENER ,name ,(prim source))))))
+               (ill-formed-syntax form))))))
+    (define-openers utf8 "UTF-8")
+    (define-openers utf16 "UTF-16")
+    (define-openers utf16-be "UTF-16BE")
+    (define-openers utf16-le "UTF-16LE")
+    (define-openers utf32 "UTF-32")
+    (define-openers utf32-be "UTF-32BE")
+    (define-openers utf32-le "UTF-32LE")
+    unspecific))
+\f
+(define (make-utf-output-opener coding-name sink-char)
+  (let ((type
+        (make-port-type
+         `((WRITE-CHAR
+            ,(lambda (port char)
+               (guarantee-wide-char char 'WRITE-CHAR)
+               (sink-char char (port/state port))
+               1))
+           (EXTRACT-OUTPUT!
+            ,(lambda (port)
+               (get-output-bytes (port/state port))))
+           (WRITE-SELF
+            ,(let ((suffix (string-append " to " coding-name " string")))
+               (lambda (port port*)
+                 port
+                 (write-string suffix port*)))))
+         #f)))
+    (lambda ()
+      (make-port type (open-output-byte-buffer)))))
+
+(define (make-utf-input-opener coding-name source-char)
+  (let ((type
+        (make-port-type
+         `((READ-CHAR
+            ,(lambda (port)
+               (or (source-char (port/state port) 'READ-CHAR)
+                   (make-eof-object port))))
+           (WRITE-SELF
+            ,(let ((suffix (string-append " from " coding-name " string")))
+               (lambda (port output-port)
+                 port
+                 (write-string suffix output-port)))))
+         #f)))
+    (lambda (bytes #!optional start end)
+      (make-port type
+                (open-input-byte-buffer string
+                                        (if (default-object? start) #f start)
+                                        (if (default-object? end) #f end)
+                                        #f)))))
+
+(define (utf-string->wide-string string start end source-char caller)
+  (let ((source (open-input-byte-buffer string start end caller)))
+    (%make-wide-string
+     (call-with-output-object-buffer
+      (lambda (sink)
+       (let loop ()
+         (let ((char (source-char source caller)))
+           (if char
+               (begin
+                 (sink char)
+                 (loop))))))))))
+
+(define (wide-string->utf-string string start end sink-char caller)
+  (let ((source
+        (open-input-object-buffer (wide-string-contents string) start end
+                                  caller)))
+    (call-with-output-byte-buffer
+     (lambda (sink)
+       (let loop ()
+        (let ((char (source)))
+          (if char
+              (begin
+                (sink-char char sink)
+                (loop)))))))))
 \f
 ;;;; Byte buffers
 
@@ -1463,22 +1434,25 @@ USA.
       (if (eq? byte 'EXTRACT-OUTPUT!)
          (without-interrupts
           (lambda ()
-            (set-string-maximum-length! bytes index)
-            (let ((bytes* bytes))
-              (set! bytes #f)
-              bytes*)))
-         (begin
-           (cond ((not bytes)
-                  (set! bytes (make-string 128))
-                  (set! index 0))
-                 ((not (fix:< index (string-length bytes)))
-                  (let ((n (fix:* (string-length bytes) 2)))
-                    (let ((bytes* (make-string n)))
-                      (string-move! bytes bytes* 0)
-                      (set! bytes bytes*)))))
-           (vector-8b-set! bytes index byte)
-           (set! index (fix:+ index 1))
-           unspecific)))))
+            (if bytes
+                (let ((bytes* bytes))
+                  (set! bytes #f)
+                  (set-string-maximum-length! bytes* index)
+                  bytes*)
+                (make-string 0))))
+         (without-interrupts
+          (lambda ()
+            (cond ((not bytes)
+                   (set! bytes (make-string 128))
+                   (set! index 0))
+                  ((not (fix:< index (string-length bytes)))
+                   (let ((bytes*
+                          (make-string (fix:* (string-length bytes) 2))))
+                     (string-move! bytes bytes* 0)
+                     (set! bytes bytes*))))
+            (vector-8b-set! bytes index byte)
+            (set! index (fix:+ index 1))
+            unspecific))))))
 
 (define (get-output-bytes buffer)
   (buffer 'EXTRACT-OUTPUT!))
@@ -1488,11 +1462,82 @@ USA.
     (generator buffer)
     (get-output-bytes buffer)))
 
-(define (open-input-byte-buffer bytes start end)
-  (let ((index (or start 0))
-       (end (or end (string-length bytes))))
+(define (open-input-byte-buffer bytes start end caller)
+  (let* ((end
+         (if (not end)
+             (string-length bytes)
+             (guarantee-limited-index end (string-length bytes) caller)))
+        (index
+         (if (not start)
+             0
+             (guarantee-limited-index start end caller))))
     (lambda ()
-      (and (fix:< index end)
-          (let ((byte (vector-8b-ref bytes index)))
+      (without-interrupts
+       (lambda ()
+        (and (fix:< index end)
+             (let ((byte (vector-8b-ref bytes index)))
+               (set! index (fix:+ index 1))
+               byte)))))))
+\f
+;;;; Object buffers
+
+(define (open-output-object-buffer)
+  (let ((objects #f)
+       (index))
+    (lambda (object)
+      (if (eq? object extract-output-tag)
+         (without-interrupts
+          (lambda ()
+            (if objects
+                (let ((objects* objects))
+                  (set! objects #f)
+                  (if (fix:< index (vector-length objects*))
+                      (vector-head objects* index)
+                      objects*))
+                (make-vector 0))))
+         (without-interrupts
+          (lambda ()
+            (cond ((not objects)
+                   (set! objects (make-vector 128))
+                   (set! index 0))
+                  ((not (fix:< index (vector-length objects)))
+                   (set! objects
+                         (vector-grow objects
+                                      (fix:* (vector-length objects) 2)))))
+            (vector-set! objects index object)
             (set! index (fix:+ index 1))
-            byte)))))
\ No newline at end of file
+            unspecific))))))
+
+(define (get-output-objects buffer)
+  (buffer extract-output-tag))
+
+(define extract-output-tag
+  (list 'EXTRACT-OUTPUT!))
+
+(define (call-with-output-object-buffer generator)
+  (let ((buffer (open-output-object-buffer)))
+    (generator buffer)
+    (get-output-objects buffer)))
+
+(define (open-input-object-buffer objects start end caller)
+  (let* ((end
+         (if (not end)
+             (vector-length objects)
+             (guarantee-limited-index end (vector-length objects) caller)))
+        (index
+         (if (not start)
+             0
+             (guarantee-limited-index start end caller))))
+    (lambda ()
+      (without-interrupts
+       (lambda ()
+        (and (fix:< index end)
+             (let ((object (vector-ref objects index)))
+               (set! index (fix:+ index 1))
+               object)))))))
+
+(define (guarantee-limited-index index limit caller)
+  (guarantee-index-fixnum index caller)
+  (if (not (fix:<= index limit))
+      (error:bad-range-argument index caller))
+  index)
\ No newline at end of file