From 62f2f5914563a99b96907b99517670384334b477 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Sat, 19 Jul 2008 01:41:18 +0000 Subject: [PATCH] Rewrite I/O string implementation to eliminate coding on strings -- input from a string is just the characters in the string. The old string I/O is renamed to refer to byte vectors, and a collection of convenience procedurs that use byte-vector I/O to do UTF-xx coding. Additionally, rewrite unicode support to use these I/O ports to do format conversions. --- v7/src/runtime/ed-ffi.scm | 6 +- v7/src/runtime/make.scm | 8 +- v7/src/runtime/output.scm | 44 +- v7/src/runtime/runtime.pkg | 108 +++-- v7/src/runtime/stringio.scm | 646 +++++++++++++++++++++++++++++ v7/src/runtime/strnin.scm | 85 ---- v7/src/runtime/strott.scm | 134 ------ v7/src/runtime/strout.scm | 140 ------- v7/src/runtime/symbol.scm | 8 +- v7/src/runtime/unicode.scm | 801 +++++++++++------------------------- v7/src/runtime/url.scm | 8 +- v7/src/ssp/mod-lisp.scm | 4 +- v7/src/ssp/xmlrpc.scm | 4 +- v7/src/xml/rdf-nt.scm | 8 +- v7/src/xml/turtle.scm | 14 +- v7/src/xml/xml-output.scm | 7 +- v7/src/xml/xml-parser.scm | 9 +- v7/src/xml/xml-struct.scm | 7 +- v7/src/xml/xpath.scm | 4 +- 19 files changed, 1041 insertions(+), 1004 deletions(-) create mode 100644 v7/src/runtime/stringio.scm delete mode 100644 v7/src/runtime/strnin.scm delete mode 100644 v7/src/runtime/strott.scm delete mode 100644 v7/src/runtime/strout.scm diff --git a/v7/src/runtime/ed-ffi.scm b/v7/src/runtime/ed-ffi.scm index 20b3be5a6..c8459f74c 100644 --- a/v7/src/runtime/ed-ffi.scm +++ b/v7/src/runtime/ed-ffi.scm @@ -1,6 +1,6 @@ #| -*- Scheme -*- -$Id: ed-ffi.scm,v 1.40 2008/01/30 20:02:30 cph Exp $ +$Id: ed-ffi.scm,v 1.41 2008/07/19 01:41:16 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -141,9 +141,7 @@ USA. ("starbase" (runtime starbase-graphics)) ("stream" (runtime stream)) ("string" (runtime string)) - ("strnin" (runtime string-input)) - ("strott" (runtime truncated-string-output)) - ("strout" (runtime string-output)) + ("stringio" (runtime string-i/o-port)) ("symbol" (runtime symbol)) ("syncproc" (runtime synchronous-subprocess)) ("syntactic-closures" (runtime syntactic-closures)) diff --git a/v7/src/runtime/make.scm b/v7/src/runtime/make.scm index ddc009b76..3e0ae166e 100644 --- a/v7/src/runtime/make.scm +++ b/v7/src/runtime/make.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: make.scm,v 14.115 2008/02/10 06:14:11 cph Exp $ +$Id: make.scm,v 14.116 2008/07/19 01:41:16 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -461,14 +461,13 @@ USA. ;; Threads (RUNTIME THREAD) ;; I/O + (RUNTIME OUTPUT-PORT) (RUNTIME GENERIC-I/O-PORT) (RUNTIME FILE-I/O-PORT) (RUNTIME CONSOLE-I/O-PORT) (RUNTIME SOCKET) (RUNTIME TRANSCRIPT) - (RUNTIME STRING-INPUT) - (RUNTIME STRING-OUTPUT) - (RUNTIME TRUNCATED-STRING-OUTPUT) + (RUNTIME STRING-I/O-PORT) (RUNTIME USER-INTERFACE) ;; These MUST be done before (RUNTIME PATHNAME) ;; Typically only one of them is loaded. @@ -477,7 +476,6 @@ USA. (RUNTIME PATHNAME) (RUNTIME WORKING-DIRECTORY) (RUNTIME LOAD) - (RUNTIME UNICODE) (RUNTIME SIMPLE-FILE-OPS) ((RUNTIME OS-PRIMITIVES) INITIALIZE-MIME-TYPES! #f) ;; Syntax diff --git a/v7/src/runtime/output.scm b/v7/src/runtime/output.scm index 0c1224cd1..e85d91877 100644 --- a/v7/src/runtime/output.scm +++ b/v7/src/runtime/output.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: output.scm,v 14.40 2008/01/30 20:02:33 cph Exp $ +$Id: output.scm,v 14.41 2008/07/19 01:41:16 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -307,4 +307,44 @@ USA. (write-char #\space port) (write-spaces (- n 1))))) - (if row-major? (do-row-major) (do-col-major)))) \ No newline at end of file + (if row-major? (do-row-major) (do-col-major)))) + +;;;; Output truncation + +(define (call-with-truncated-output-port limit port generator) + (call-with-current-continuation + (lambda (k) + (let ((port (make-port truncated-output-type + (make-tstate limit port k 0)))) + (generator port) + #f)))) + +(define-structure tstate + (port #f read-only #t) + (limit #f read-only #t) + (continuation #f read-only #t) + count) + +(define (trunc-out/write-char port char) + (let ((ts (port/state port))) + (if (< (tstate-count ts) (tstate-limit ts)) + (begin + (set-tstate-count! ts (+ (tstate-count ts) 1)) + (output-port/write-char (tstate-port ts) char)) + ((tstate-continuation ts) #t)))) + +(define (trunc-out/flush-output port) + (output-port/flush-output (tstate-port (port/state port)))) + +(define (trunc-out/discretionary-flush-output port) + (output-port/discretionary-flush (tstate-port (port/state port)))) + +(define truncated-output-type) +(define (initialize-package!) + (set! truncated-output-type + (make-port-type `((WRITE-CHAR ,trunc-out/write-char) + (FLUSH-OUTPUT ,trunc-out/flush-output) + (DISCRETIONARY-FLUSH-OUTPUT + ,trunc-out/discretionary-flush-output)) + #f)) + unspecific) \ No newline at end of file diff --git a/v7/src/runtime/runtime.pkg b/v7/src/runtime/runtime.pkg index bb238a235..ce9f33ccd 100644 --- a/v7/src/runtime/runtime.pkg +++ b/v7/src/runtime/runtime.pkg @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: runtime.pkg,v 14.648 2008/07/11 05:26:42 cph Exp $ +$Id: runtime.pkg,v 14.649 2008/07/19 01:41:16 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -2081,6 +2081,7 @@ USA. (parent (runtime)) (export () beep + call-with-truncated-output-port clear display flush-output @@ -2107,7 +2108,8 @@ USA. write-line write-string write-strings-in-columns - write-substring)) + write-substring) + (initialization (initialize-package!))) (define-package (runtime interrupt-handler) (files "intrpt") @@ -4101,28 +4103,33 @@ USA. the-empty-stream) (initialization (initialize-package!))) -(define-package (runtime string-input) - (files "strnin") +(define-package (runtime string-i/o-port) + (files "stringio") (parent (runtime)) (export () - call-with-input-string - open-input-string - (string->input-port open-input-string) - with-input-from-string) - (initialization (initialize-package!))) - -(define-package (runtime string-output) - (files "strout") - (parent (runtime)) - (export () - call-with-output-string + (call-with-output-string call-with-narrow-output-string) (get-output-from-accumulator get-output-string!) + (make-accumulator-output-port open-narrow-output-string) + (open-output-string open-narrow-output-string) + (open-wide-input-string open-input-string) + (string->input-port open-input-string) + (with-string-output-port call-with-narrow-output-string) + call-with-input-bytes + call-with-input-string + call-with-narrow-output-string + call-with-output-bytes + call-with-truncated-output-string + call-with-wide-output-string get-output-string get-output-string! - (make-accumulator-output-port open-output-string) - open-output-string - (with-string-output-port call-with-output-string) - with-output-to-string) + open-input-bytes + open-input-string + open-narrow-output-string + open-output-bytes + open-wide-output-string + with-input-from-string + with-output-to-string + with-output-to-truncated-string) (initialization (initialize-package!))) (define-package (runtime syntactic-closures) @@ -4221,14 +4228,6 @@ USA. increment-non-runtime!) (initialization (initialize-package!))) -(define-package (runtime truncated-string-output) - (files "strott") - (parent (runtime)) - (export () - call-with-truncated-output-string - with-output-to-truncated-string) - (initialization (initialize-package!))) - (define-package (runtime unparser) (files "unpars") (parent (runtime)) @@ -4853,6 +4852,13 @@ USA. (files "unicode") (parent (runtime)) (export () + (wide-string->utf16-be-string string->utf16-be-string) + (wide-string->utf16-le-string string->utf16-le-string) + (wide-string->utf16-string string->utf16-string) + (wide-string->utf32-be-string string->utf32-be-string) + (wide-string->utf32-le-string string->utf32-le-string) + (wide-string->utf32-string string->utf32-string) + (wide-string->utf8-string string->utf8-string) 8-bit-alphabet? @@ -4864,7 +4870,20 @@ USA. alphabet->string alphabet-predicate alphabet? - call-with-wide-output-string + call-with-utf16-be-input-string + call-with-utf16-be-output-string + call-with-utf16-input-string + call-with-utf16-le-input-string + call-with-utf16-le-output-string + call-with-utf16-output-string + call-with-utf32-be-input-string + call-with-utf32-be-output-string + call-with-utf32-input-string + call-with-utf32-le-input-string + call-with-utf32-le-output-string + call-with-utf32-output-string + call-with-utf8-input-string + call-with-utf8-output-string char-in-alphabet? char-set->alphabet code-points->alphabet @@ -4899,9 +4918,28 @@ USA. guarantee-wide-string-index guarantee-wide-substring make-wide-string - open-wide-input-string - open-wide-output-string + open-utf16-be-input-string + open-utf16-be-output-string + open-utf16-input-string + open-utf16-le-input-string + open-utf16-le-output-string + open-utf16-output-string + open-utf32-be-input-string + open-utf32-be-output-string + open-utf32-input-string + open-utf32-le-input-string + open-utf32-le-output-string + open-utf32-output-string + open-utf8-input-string + open-utf8-output-string string->alphabet + string->utf16-be-string + string->utf16-le-string + string->utf16-string + string->utf32-be-string + string->utf32-le-string + string->utf32-string + string->utf8-string string->utf8-string string->wide-string unicode-code-point? @@ -4938,13 +4976,6 @@ USA. wide-char? wide-string wide-string->string - wide-string->utf16-be-string - wide-string->utf16-le-string - wide-string->utf16-string - wide-string->utf32-be-string - wide-string->utf32-le-string - wide-string->utf32-string - wide-string->utf8-string wide-string-index? wide-string-length wide-string-ref @@ -4959,8 +4990,7 @@ USA. (export (runtime generic-i/o-port) wide-string-contents) (export (runtime input-port) - wide-string-contents) - (initialization (initialize-package!))) + wide-string-contents)) (define-package (runtime uri) (files "url") diff --git a/v7/src/runtime/stringio.scm b/v7/src/runtime/stringio.scm new file mode 100644 index 000000000..32fe89f2a --- /dev/null +++ b/v7/src/runtime/stringio.scm @@ -0,0 +1,646 @@ +#| -*-Scheme-*- + +$Id: stringio.scm,v 14.1 2008/07/19 01:41:16 cph Exp $ + +Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, + 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, + 2006, 2007, 2008 Massachusetts Institute of Technology + +This file is part of MIT/GNU Scheme. + +MIT/GNU Scheme is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at +your option) any later version. + +MIT/GNU Scheme is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with MIT/GNU Scheme; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, +USA. + +|# + +;;;; String I/O Ports (SRFI-6) +;;; package: (runtime string-i/o-port) + +(declare (usual-integrations)) + +;;;; Input as characters + +(define (with-input-from-string string thunk) + (with-input-from-port (open-input-string string) thunk)) + +(define (call-with-input-string string procedure) + (let ((port (open-input-string string))) + (let ((value (procedure port))) + (close-input-port port) + value))) + +(define (open-input-string string #!optional start end) + (cond ((string? string) + (receive (start end) + (check-index-limits start end (string-length string) + 'OPEN-INPUT-STRING) + (make-port narrow-input-type + (make-internal-input-state string start end)))) + ((wide-string? string) + (receive (start end) + (check-index-limits start end (wide-string-length string) + 'OPEN-INPUT-STRING) + (make-port wide-input-type + (make-internal-input-state string start end)))) + ((external-string? string) + (receive (start end) + (check-index-limits start end (xstring-length string) + 'OPEN-INPUT-STRING) + (make-port external-input-type + (make-external-input-state string start end)))) + (else + (error:not-string string 'OPEN-INPUT-STRING)))) + +(define (check-index-limits start end limit caller) + (let ((end + (if (or (default-object? end) (not end)) + limit + (begin + (guarantee-exact-nonnegative-integer end caller) + (if (not (<= end limit)) + (error:bad-range-argument end caller)) + end)))) + (values (if (or (default-object? start) (not start)) + 0 + (begin + (guarantee-exact-nonnegative-integer start caller) + (if (not (<= start end)) + (error:bad-range-argument start caller)) + start)) + end))) + +(define (make-string-in-type peek-char read-char unread-char) + (make-port-type `((CHAR-READY? ,string-in/char-ready?) + (EOF? ,internal-in/eof?) + (PEEK-CHAR ,peek-char) + (READ-CHAR ,read-char) + (READ-EXTERNAL-SUBSTRING ,internal-in/read-substring) + (READ-SUBSTRING ,internal-in/read-substring) + (READ-WIDE-SUBSTRING ,internal-in/read-substring) + (UNREAD-CHAR ,unread-char) + (WRITE-SELF ,string-in/write-self)) + #f)) + +(define (make-internal-input-state string start end) + (make-iistate string start end start)) + +(define-structure iistate + (string #f read-only #t) + (start #f read-only #t) + (end #f read-only #t) + next) + +(define (string-in/char-ready? port) + port + #t) + +(define (string-in/write-self port output-port) + port + (write-string " from string" output-port)) + +(define (internal-in/eof? port) + (let ((ss (port/state port))) + (not (fix:< (iistate-next ss) (iistate-end ss))))) + +(define (internal-in/read-substring port string start end) + (let ((ss (port/state port))) + (move-chars! (iistate-string ss) (iistate-next ss) (iistate-end ss) + string start end))) + +(define (make-narrow-input-type) + (make-string-in-type narrow-in/peek-char + narrow-in/read-char + narrow-in/unread-char)) + +(define (narrow-in/peek-char port) + (let ((ss (port/state port))) + (if (fix:< (iistate-next ss) (iistate-end ss)) + (string-ref (iistate-string ss) (iistate-next ss)) + (make-eof-object port)))) + +(define (narrow-in/read-char port) + (let ((ss (port/state port))) + (if (fix:< (iistate-next ss) (iistate-end ss)) + (let ((char (string-ref (iistate-string ss) (iistate-next ss)))) + (set-iistate-next! ss (fix:+ (iistate-next ss) 1)) + char) + (make-eof-object port)))) + +(define (narrow-in/unread-char port char) + (let ((ss (port/state port))) + (if (not (fix:< (iistate-start ss) (iistate-next ss))) + (error "No char to unread:" port)) + (let ((prev (fix:- (iistate-next ss) 1))) + (if (not (char=? char (string-ref (iistate-string ss) prev))) + (error "Unread char incorrect:" char)) + (set-iistate-next! ss prev)))) + +(define (make-wide-input-type) + (make-string-in-type wide-in/peek-char + wide-in/read-char + wide-in/unread-char)) + +(define (wide-in/peek-char port) + (let ((ss (port/state port))) + (if (fix:< (iistate-next ss) (iistate-end ss)) + (wide-string-ref (iistate-string ss) (iistate-next ss)) + (make-eof-object port)))) + +(define (wide-in/read-char port) + (let ((ss (port/state port))) + (if (fix:< (iistate-next ss) (iistate-end ss)) + (let ((char (wide-string-ref (iistate-string ss) (iistate-next ss)))) + (set-iistate-next! ss (fix:+ (iistate-next ss) 1)) + char) + (make-eof-object port)))) + +(define (wide-in/unread-char port char) + (let ((ss (port/state port))) + (if (not (fix:< (iistate-start ss) (iistate-next ss))) + (error "No char to unread:" port)) + (let ((prev (fix:- (iistate-next ss) 1))) + (if (not (char=? char (wide-string-ref (iistate-string ss) prev))) + (error "Unread char incorrect:" char)) + (set-iistate-next! ss prev)))) + +(define (make-external-input-type) + (make-port-type + `((CHAR-READY? ,string-in/char-ready?) + (EOF? ,external-in/eof?) + (PEEK-CHAR ,external-in/peek-char) + (READ-CHAR ,external-in/read-char) + (READ-EXTERNAL-SUBSTRING ,external-in/read-substring) + (READ-SUBSTRING ,external-in/read-substring) + (READ-WIDE-SUBSTRING ,external-in/read-substring) + (UNREAD-CHAR ,external-in/unread-char) + (WRITE-SELF ,string-in/write-self)) + #f)) + +(define (make-external-input-state string start end) + (make-xistate (external-string-source string start end) #f #f)) + +(define-structure xistate + (source #f read-only #t) + unread) + +(define (external-in/eof? port) + (let ((xs (port/state port))) + (and (not (xistate-unread xs)) + (not ((xistate-source xs)))))) + +(define (external-in/peek-char port) + (let ((xs (port/state port))) + (or (xistate-unread xs) + (let ((char ((xistate-source xs)))) + (set-xistate-unread! xs char) + char)))) + +(define (external-in/read-char port) + (let ((xs (port/state port))) + (let ((unread (xistate-unread xs))) + (if unread + (begin + (set-xistate-unread! xs #f) + unread) + ((xistate-source xs)))))) + +(define (external-in/unread-char port char) + (let ((xs (port/state port))) + (if (xistate-unread xs) + (error "Can't unread two chars.")) + (set-xistate-unread! xs char))) + +(define (external-in/read-substring port string start end) + (source->sink! (xistate-source (port/state port)) + (string-sink string start end))) + +(define (move-chars! string start end string* start* end*) + (let ((n (min (- end start) (- end* start*)))) + (let ((end (+ start n)) + (end* (+ start* n))) + (cond ((wide-string? string) + (source->sink! (wide-string-source string start end) + (string-sink string* start* end*))) + ((wide-string? string*) + (source->sink! (string-source string start end) + (wide-string-sink string* start* end*))) + (else + (xsubstring-move! string start end string* start*)))) + n)) + +(define (source->sink! source sink) + (let loop ((n 0)) + (if (sink (source)) + (loop (+ n 1)) + n))) + +(define (string-source string start end) + (cond ((string? string) (narrow-string-source string start end)) + ((wide-string? string) (wide-string-source string start end)) + ((external-string? string) (external-string-source string start end)) + (else (error:not-string string #f)))) + +(define (string-sink string start end) + (cond ((string? string) (narrow-string-sink string start end)) + ((wide-string? string) (wide-string-sink string start end)) + ((external-string? string) (external-string-sink string start end)) + (else (error:not-string string #f)))) + +(define (narrow-string-source string start end) + (lambda () + (and (fix:< start end) + (let ((char (string-ref string start))) + (set! start (fix:+ start 1)) + char)))) + +(define (narrow-string-sink string start end) + (lambda (char) + (and char + (begin + (if (not (fix:< (char->integer char) #x100)) + (error:not-8-bit-char char)) + (and (fix:< start end) + (begin + (string-set! string start char) + (set! start (+ start 1)) + #t)))))) + +(define (wide-string-source string start end) + (lambda () + (and (fix:< start end) + (let ((char (wide-string-ref string start))) + (set! start (fix:+ start 1)) + char)))) + +(define (wide-string-sink string start end) + (lambda (char) + (and char + (fix:< start end) + (begin + (wide-string-set! string start char) + (set! start (+ start 1)) + #t)))) + +(define (external-string-source string start end) + (let ((buffer (make-string #x1000)) + (bi #x1000) + (next start)) + (lambda () + (and (< next end) + (begin + (if (fix:>= bi #x1000) + (begin + (xsubstring-move! string next (min (+ next #x1000) end) + buffer 0) + (set! bi 0))) + (let ((char (string-ref buffer bi))) + (set! bi (fix:+ bi 1)) + (set! next (+ next 1)) + char)))))) + +(define (external-string-sink string start end) + (let ((buffer (make-string #x1000)) + (bi 0)) + (lambda (char) + (if char + (begin + (if (not (fix:< (char->integer char) #x100)) + (error:not-8-bit-char char)) + (and (< start end) + (begin + (string-set! buffer bi char) + (set! bi (fix:+ bi 1)) + (set! start (+ start 1)) + (if (fix:= bi #x1000) + (begin + (xsubstring-move! buffer 0 bi string (- start bi)) + (set! bi 0))) + #t))) + (begin + (xsubstring-move! buffer 0 bi string (- start bi)) + (set! bi 0) + #f))))) + +;;;; Input as byte vector + +(define (call-with-input-bytes bytes procedure) + (let ((port (open-input-bytes bytes))) + (let ((value (procedure port))) + (close-input-port port) + value))) + +(define (open-input-bytes bytes #!optional start end) + (guarantee-xstring bytes 'OPEN-INPUT-BYTES) + (receive (start end) + (check-index-limits start end (xstring-length bytes) 'OPEN-INPUT-BYTES) + (let ((port + (make-generic-i/o-port (make-bytes-source bytes start end) + #f + bytes-input-type))) + (port/set-coding port 'ISO-8859-1) + (port/set-line-ending port 'NEWLINE) + port))) + +(define (make-bytes-source string start end) + (let ((index start)) + (make-non-channel-port-source + (lambda () + (< index end)) + (lambda (string* start* end*) + (let ((n (min (- end index) (- end* start*)))) + (let ((limit (+ index n))) + (xsubstring-move! string index limit string* start*) + (set! index limit)) + n))))) + +(define (make-bytes-input-type) + (make-port-type `((WRITE-SELF + ,(lambda (port output-port) + port + (write-string " from byte vector" output-port)))) + (generic-i/o-port-type #t #f))) + +;;;; Output as characters + +(define (open-narrow-output-string) + (make-port narrow-output-type (make-ostate (make-string 16) 0 0))) + +(define (open-wide-output-string) + (make-port wide-output-type (make-ostate (make-wide-string 16) 0 0))) + +(define (get-output-string port) + ((port/operation port 'EXTRACT-OUTPUT) port)) + +(define (get-output-string! port) + ((port/operation port 'EXTRACT-OUTPUT!) port)) + +(define (call-with-narrow-output-string generator) + (let ((port (open-narrow-output-string))) + (generator port) + (get-output-string port))) + +(define (call-with-wide-output-string generator) + (let ((port (open-wide-output-string))) + (generator port) + (get-output-string port))) + +(define (call-with-truncated-output-string limit generator) + (call-with-narrow-output-string + (lambda (port) + (call-with-truncated-output-port limit port generator)))) + +(define (with-output-to-string thunk) + (call-with-narrow-output-string + (lambda (port) + (with-output-to-port port thunk)))) + +(define (with-output-to-truncated-string limit thunk) + (call-with-truncated-output-string limit + (lambda (port) + (with-output-to-port port thunk)))) + +(define (make-narrow-output-type) + (make-string-out-type narrow-out/write-char + narrow-out/extract-output + narrow-out/extract-output!)) + +(define (narrow-out/write-char port char) + (if (not (fix:< (char->integer char) #x100)) + (error:not-8-bit-char char)) + (let ((os (port/state port))) + (maybe-grow-buffer os 1) + (string-set! (ostate-buffer os) (ostate-index os) char) + (set-ostate-index! os (fix:+ (ostate-index os) 1)) + (set-ostate-column! os (new-column char (ostate-column os))) + 1)) + +(define (narrow-out/extract-output port) + (let ((os (port/state port))) + (string-head (ostate-buffer os) (ostate-index os)))) + +(define (narrow-out/extract-output! port) + (let ((os (port/state port))) + (let ((string (ostate-buffer os))) + (set-string-maximum-length! string (ostate-index os)) + (reset-buffer! os) + string))) + +(define (make-wide-output-type) + (make-string-out-type wide-out/write-char + wide-out/extract-output + wide-out/extract-output!)) + +(define (wide-out/write-char port char) + (let ((os (port/state port))) + (maybe-grow-buffer os 1) + (wide-string-set! (ostate-buffer os) (ostate-index os) char) + (set-ostate-index! os (fix:+ (ostate-index os) 1)) + (set-ostate-column! os (new-column char (ostate-column os))) + 1)) + +(define (wide-out/extract-output port) + (let ((os (port/state port))) + (wide-substring (ostate-buffer os) 0 (ostate-index os)))) + +(define (wide-out/extract-output! port) + (let ((os (port/state port))) + (let ((output (wide-substring (ostate-buffer os) 0 (ostate-index os)))) + (reset-buffer! os) + output))) + +(define (make-string-out-type write-char extract-output extract-output!) + (make-port-type `((WRITE-CHAR ,write-char) + (WRITE-EXTERNAL-SUBSTRING ,string-out/write-substring) + (WRITE-SUBSTRING ,string-out/write-substring) + (WRITE-WIDE-SUBSTRING ,string-out/write-substring) + (EXTRACT-OUTPUT ,extract-output) + (EXTRACT-OUTPUT! ,extract-output!) + (OUTPUT-COLUMN ,string-out/output-column) + (WRITE-SELF ,string-out/write-self)) + #f)) + +(define-structure ostate + buffer + index + column) + +(define (string-out/output-column port) + (ostate-column (port/state port))) + +(define (string-out/write-self port output-port) + port + (write-string " to string" output-port)) + +(define (string-out/write-substring port string start end) + (let ((os (port/state port)) + (n (- end start))) + (maybe-grow-buffer os n) + (let* ((start* (ostate-index os)) + (end* (+ start* n))) + (move-chars! string start end (ostate-buffer os) start* end*) + (set-ostate-index! os end*)) + (update-column-for-substring! os n) + n)) + +(define (maybe-grow-buffer os n) + (let ((buffer (ostate-buffer os)) + (n (+ (ostate-index os) n))) + (let ((m + (if (wide-string? buffer) + (wide-string-length buffer) + (string-length buffer)))) + (if (< m n) + (let ((buffer* + (let ((m* + (let loop ((m (+ m m))) + (if (< m n) + (loop (+ m m)) + m)))) + (if (wide-string? buffer) + (make-wide-string m*) + (make-string m*))))) + (move-chars! buffer 0 (ostate-index os) + buffer* 0 (ostate-index os)) + (set-ostate-buffer! os buffer*)))))) + +(define (reset-buffer! os) + (set-ostate-buffer! os + (if (wide-string? (ostate-buffer os)) + (make-wide-string 16) + (make-string 16))) + (set-ostate-index! os 0) + (set-ostate-column! os 0)) + +(define (new-column char column) + (case char + ((#\newline) 0) + ((#\tab) (fix:+ column (fix:- 8 (fix:remainder column 8)))) + (else (fix:+ column 1)))) + +(define (update-column-for-substring! os n) + (let ((string (ostate-buffer os)) + (end (ostate-index os))) + (let ((start (- (ostate-index os) n))) + (letrec + ((loop + (lambda (i column) + (if (< i end) + (loop (+ i 1) + (new-column (if (wide-string? string) + (wide-string-ref string i) + (string-ref string i)) + column)) + (set-ostate-column! os column))))) + (let ((nl (find-newline string start end))) + (if nl + (loop (+ nl 1) 0) + (loop start (ostate-column os)))))))) + +(define (find-newline string start end) + (if (wide-string? string) + (let loop ((index end)) + (and (fix:> index start) + (let ((index (fix:- index 1))) + (if (char=? (wide-string-ref string index) #\newline) + index + (loop index))))) + (xsubstring-find-previous-char string start end #\newline))) + +;;;; Output as bytes + +(define (call-with-output-bytes generator) + (let ((port (open-output-bytes))) + (generator port) + (get-output-string port))) + +(define (open-output-bytes) + (let ((port + (let ((os (make-ostate (make-vector-8b 16) 0 #f))) + (make-generic-i/o-port #f + (make-byte-sink os) + bytes-output-type + os)))) + (port/set-line-ending port 'NEWLINE) + port)) + +(define (make-byte-sink os) + (make-non-channel-port-sink + (lambda (bytes start end) + (let ((index (ostate-index os))) + (let ((n (fix:+ index (fix:- end start)))) + (let ((buffer (ostate-buffer os))) + (if (fix:> n (vector-8b-length buffer)) + (set-ostate-buffer! + os + (let ((new + (make-vector-8b + (let loop ((m (vector-8b-length buffer))) + (if (fix:>= m n) + m + (loop (fix:+ m m))))))) + (substring-move! buffer 0 index new 0) + new)))) + (substring-move! bytes start end (ostate-buffer os) index) + (set-ostate-index! os n) + (fix:- end start)))))) + +(define (make-bytes-output-type) + (make-port-type `((EXTRACT-OUTPUT ,bytes-out/extract-output) + (EXTRACT-OUTPUT! ,bytes-out/extract-output!) + (POSITION ,bytes-out/position) + (WRITE-SELF ,bytes-out/write-self)) + (generic-i/o-port-type #f #t))) + +(define (bytes-out/extract-output port) + (output-port/flush-output port) + (let ((os (output-bytes-port/os port))) + (string-head (ostate-buffer os) (ostate-index os)))) + +(define (bytes-out/extract-output! port) + (output-port/flush-output port) + (let ((os (output-bytes-port/os port))) + (let ((bytes (ostate-buffer os))) + (set-string-maximum-length! bytes (ostate-index os)) + (set-ostate-buffer! os (make-vector-8b 16)) + (set-ostate-index! os 0) + bytes))) + +(define (bytes-out/position port) + (output-port/flush-output port) + (ostate-index (output-bytes-port/os port))) + +(define (bytes-out/write-self port output-port) + port + (write-string " to byte vector" output-port)) + +(define narrow-input-type) +(define wide-input-type) +(define external-input-type) +(define bytes-input-type) +(define narrow-output-type) +(define wide-output-type) +(define bytes-output-type) +(define output-bytes-port/os) + +(define (initialize-package!) + (set! narrow-input-type (make-narrow-input-type)) + (set! wide-input-type (make-wide-input-type)) + (set! external-input-type (make-external-input-type)) + (set! bytes-input-type (make-bytes-input-type)) + (set! narrow-output-type (make-narrow-output-type)) + (set! wide-output-type (make-wide-output-type)) + (set! bytes-output-type (make-bytes-output-type)) + (set! output-bytes-port/os (generic-i/o-port-accessor 0)) + unspecific) \ No newline at end of file diff --git a/v7/src/runtime/strnin.scm b/v7/src/runtime/strnin.scm deleted file mode 100644 index 053708a8f..000000000 --- a/v7/src/runtime/strnin.scm +++ /dev/null @@ -1,85 +0,0 @@ -#| -*-Scheme-*- - -$Id: strnin.scm,v 14.23 2008/02/02 02:02:51 cph Exp $ - -Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, - 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, - 2006, 2007, 2008 Massachusetts Institute of Technology - -This file is part of MIT/GNU Scheme. - -MIT/GNU Scheme is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or (at -your option) any later version. - -MIT/GNU Scheme is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with MIT/GNU Scheme; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, -USA. - -|# - -;;;; String Input Ports (SRFI-6) -;;; package: (runtime string-input) - -(declare (usual-integrations)) - -(define (with-input-from-string string thunk) - (with-input-from-port (open-input-string string) thunk)) - -(define (open-input-string string #!optional start end) - (guarantee-string string 'OPEN-INPUT-STRING) - (let ((port - (let* ((end - (if (or (default-object? end) (not end)) - (string-length string) - (guarantee-substring-end-index end (string-length string) - 'OPEN-INPUT-STRING))) - (start - (if (or (default-object? start) (not start)) - 0 - (guarantee-substring-start-index start end - 'OPEN-INPUT-STRING)))) - (make-generic-i/o-port (make-string-source string start end) - #f - input-string-port-type)))) - (port/set-coding port 'ISO-8859-1) - (port/set-line-ending port 'NEWLINE) - port)) - -(define (call-with-input-string string procedure) - (let ((port (open-input-string string))) - (let ((value (procedure port))) - (close-input-port port) - value))) - -(define (make-string-source string start end) - (let ((index start)) - (make-non-channel-port-source - (lambda () - (fix:< index end)) - (lambda (string* start* end*) - (let ((n - (fix:min (fix:- end index) - (fix:- end* start*)))) - (let ((limit (fix:+ index n))) - (substring-move! string index limit string* start*) - (set! index limit)) - n))))) - -(define input-string-port-type) -(define (initialize-package!) - (set! input-string-port-type - (make-port-type - `((WRITE-SELF - ,(lambda (port output-port) - port - (write-string " from string" output-port)))) - (generic-i/o-port-type #t #f))) - unspecific) \ No newline at end of file diff --git a/v7/src/runtime/strott.scm b/v7/src/runtime/strott.scm deleted file mode 100644 index cb902907c..000000000 --- a/v7/src/runtime/strott.scm +++ /dev/null @@ -1,134 +0,0 @@ -#| -*-Scheme-*- - -$Id: strott.scm,v 14.21 2008/02/02 04:28:47 cph Exp $ - -Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, - 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, - 2006, 2007, 2008 Massachusetts Institute of Technology - -This file is part of MIT/GNU Scheme. - -MIT/GNU Scheme is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or (at -your option) any later version. - -MIT/GNU Scheme is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with MIT/GNU Scheme; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, -USA. - -|# - -;;;; String output ports (truncated) -;;; package: (runtime truncated-string-output) - -(declare (usual-integrations)) - -(define (call-with-truncated-output-string limit generator) - (call-with-current-continuation - (lambda (k) - (let ((port - (receive (sink extract extract!) (make-accumulator-sink limit k) - (make-generic-i/o-port #f - sink - output-string-port-type - extract - extract!)))) - (port/set-coding port 'ISO-8859-1) - (port/set-line-ending port 'NEWLINE) - (generator port) - (cons #f (get-output-string port)))))) - -(define (with-output-to-truncated-string max thunk) - (call-with-truncated-output-string max - (lambda (port) - (with-output-to-port port thunk)))) - -(define port/extract) -(define port/extract!) -(define output-string-port-type) - -(define (initialize-package!) - (set! port/extract (generic-i/o-port-accessor 0)) - (set! port/extract! (generic-i/o-port-accessor 1)) - (set! output-string-port-type - (make-port-type - `((EXTRACT-OUTPUT - ,(lambda (port) - (output-port/flush-output port) - ((port/extract port)))) - (EXTRACT-OUTPUT! - ,(lambda (port) - (output-port/flush-output port) - ((port/extract! port)))) - (WRITE-SELF - ,(lambda (port output-port) - port - (write-string " to string (truncating)" output-port)))) - (generic-i/o-port-type #f #t))) - unspecific) - -(define (make-accumulator-sink limit k) - (let ((chars #f) - (index 0)) - - (define (normal-case string start end n) - (cond ((not chars) - (set! chars (new-chars 128 n))) - ((fix:> n (string-length chars)) - (let ((new (new-chars (string-length chars) n))) - (substring-move! chars 0 index new 0) - (set! chars new)))) - (substring-move! string start end chars index) - (set! index n) - (fix:- end start)) - - (define (new-chars start min-length) - (make-string - (let loop ((n start)) - (cond ((fix:>= n limit) limit) - ((fix:>= n min-length) n) - (else (loop (fix:+ n n))))))) - - (define (limit-case string start) - (let ((s - (cond ((not chars) (make-string limit)) - ((fix:> limit (string-length chars)) - (let ((s (make-string limit))) - (substring-move! chars 0 index s 0) - s)) - (else chars)))) - (substring-move! string start (fix:+ start (fix:- limit index)) - s index) - (set! chars #f) - (set! index 0) - (k (cons #t s)))) - - (values (make-non-channel-port-sink - (lambda (string start end) - (without-interrupts - (lambda () - (let ((n (fix:+ index (fix:- end start)))) - (if (fix:<= n limit) - (normal-case string start end n) - (limit-case string start))))))) - (lambda () - (if chars - (string-head chars index) - (make-string 0))) - (lambda () - (without-interrupts - (lambda () - (if chars - (let ((s chars)) - (set! chars #f) - (set! index 0) - (set-string-maximum-length! s index) - s) - (make-string 0)))))))) \ No newline at end of file diff --git a/v7/src/runtime/strout.scm b/v7/src/runtime/strout.scm deleted file mode 100644 index d23f8a697..000000000 --- a/v7/src/runtime/strout.scm +++ /dev/null @@ -1,140 +0,0 @@ -#| -*-Scheme-*- - -$Id: strout.scm,v 14.32 2008/02/02 04:28:48 cph Exp $ - -Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, - 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, - 2006, 2007, 2008 Massachusetts Institute of Technology - -This file is part of MIT/GNU Scheme. - -MIT/GNU Scheme is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or (at -your option) any later version. - -MIT/GNU Scheme is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with MIT/GNU Scheme; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, -USA. - -|# - -;;;; String Output Ports (SRFI-6) -;;; package: (runtime string-output) - -(declare (usual-integrations)) - -(define (open-output-string) - (let ((port - (receive (sink extract extract! position) (make-accumulator-sink) - (make-generic-i/o-port #f - sink - accumulator-output-port-type - extract - extract! - position)))) - (port/set-coding port 'ISO-8859-1) - (port/set-line-ending port 'NEWLINE) - port)) - -(define (get-output-string port) - ((port/operation port 'EXTRACT-OUTPUT) port)) - -(define (get-output-string! port) - ((port/operation port 'EXTRACT-OUTPUT!) port)) - -(define (call-with-output-string generator) - (let ((port (open-output-string))) - (generator port) - (get-output-string port))) - -(define (with-output-to-string thunk) - (call-with-output-string - (lambda (port) - (with-output-to-port port thunk)))) - -(define port/extract) -(define port/extract!) -(define port/position) -(define accumulator-output-port-type) - -(define (initialize-package!) - (set! port/extract (generic-i/o-port-accessor 0)) - (set! port/extract! (generic-i/o-port-accessor 1)) - (set! port/position (generic-i/o-port-accessor 2)) - (set! accumulator-output-port-type - (make-port-type - `((EXTRACT-OUTPUT - ,(lambda (port) - (output-port/flush-output port) - ((port/extract port)))) - (EXTRACT-OUTPUT! - ,(lambda (port) - (output-port/flush-output port) - ((port/extract! port)))) - (POSITION - ,(lambda (port) - (output-port/flush-output port) - ((port/position port)))) - (WRITE-SELF - ,(lambda (port output-port) - port - (write-string " to string" output-port)))) - (generic-i/o-port-type #f #t))) - unspecific) - -(define (make-accumulator-sink) - (let ((chars #f) - (index 0)) - - (define (write-substring string start end) - (let ((n (fix:+ index (fix:- end start)))) - (cond ((not chars) - (set! chars (new-chars 128 n))) - ((fix:> n (string-length chars)) - (set! chars - (let ((new (new-chars (string-length chars) n))) - (substring-move! chars 0 index new 0) - new)))) - (substring-move! string start end chars index) - (set! index n) - (fix:- end start))) - - (define (new-chars start min-length) - (make-string - (let loop ((n start)) - (if (fix:>= n min-length) - n - (loop (fix:+ n n)))))) - - (values (make-non-channel-port-sink - (lambda (string start end) - (without-interrupts - (lambda () - (write-substring string start end))))) - (lambda () - (without-interrupts - (lambda () - (if chars - (string-head chars index) - (make-string 0))))) - (lambda () - (without-interrupts - (lambda () - (if chars - (let ((s chars)) - (set-string-maximum-length! s index) - (set! chars #f) - (set! index 0) - s) - (make-string 0))))) - (lambda () - (without-interrupts - (lambda () - index)))))) \ No newline at end of file diff --git a/v7/src/runtime/symbol.scm b/v7/src/runtime/symbol.scm index 8093cb3d6..b816e782e 100644 --- a/v7/src/runtime/symbol.scm +++ b/v7/src/runtime/symbol.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: symbol.scm,v 1.25 2008/01/30 20:02:35 cph Exp $ +$Id: symbol.scm,v 1.26 2008/07/19 01:41:17 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -101,12 +101,10 @@ USA. (if (ascii-string? string) ;; Needed during cold load. (string-downcase string) - (call-with-input-string string + (call-with-utf8-input-string string (lambda (input) - (port/set-coding input 'utf-8) - (call-with-output-string + (call-with-utf8-output-string (lambda (output) - (port/set-coding output 'utf-8) (let loop () (let ((c (read-char input))) (if (not (eof-object? c)) diff --git a/v7/src/runtime/unicode.scm b/v7/src/runtime/unicode.scm index 9488b80e6..e1c0620b8 100644 --- a/v7/src/runtime/unicode.scm +++ b/v7/src/runtime/unicode.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: unicode.scm,v 1.37 2008/07/11 05:26:43 cph Exp $ +$Id: unicode.scm,v 1.38 2008/07/19 01:41:17 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -95,6 +95,12 @@ USA. (list-tail form 5))))) (ill-formed-syntax form))))) +(define (guarantee-limited-index index limit caller) + (guarantee-index-fixnum index caller) + (if (not (fix:<= index limit)) + (error:bad-range-argument index caller)) + index) + (define (encoded-string-length string start end type caller validate-char) (let loop ((start start) (n 0)) (if (fix:< start end) @@ -116,20 +122,45 @@ USA. (loop start*) #f)) #t))) - -(define (port->byte-source port) + +(define (coded-input-opener coding) + (lambda (string #!optional start end) + (let ((port (open-input-bytes string start end))) + (port/set-coding port coding) + (port/set-line-ending port 'NEWLINE) + port))) + +(define (coded-output-opener coding) + (lambda () + (let ((port (open-output-bytes))) + (port/set-coding port coding) + (port/set-line-ending port 'NEWLINE) + port))) + +(define (ended-input-opener be le) + (lambda (string #!optional start end) + (if (host-big-endian?) + (be string start end) + (le string start end)))) + +(define (ended-output-opener be le) (lambda () - (let ((char (read-char port))) - (if (eof-object? char) - #f - (let ((b (char->integer char))) - (if (not (fix:< b #x100)) - (error "Illegal input byte:" b)) - b))))) - -(define (port->byte-sink port) - (lambda (byte) - (write-char (integer->char byte) port))) + (if (host-big-endian?) + (be) + (le)))) + +(define (input-string-caller open-input) + (lambda (string procedure) + (let ((port (open-input string))) + (let ((value (procedure port))) + (close-input-port port) + value)))) + +(define (output-string-caller open-output) + (lambda (procedure) + (let ((port (open-output))) + (procedure port) + (get-output-string! port)))) ;;;; Unicode characters @@ -623,127 +654,100 @@ Not used at the moment. (guarantee-limited-index start end caller)) (define (string->wide-string string #!optional start end) - (guarantee-string string 'STRING->WIDE-STRING) - (let* ((end - (if (if (default-object? end) #f end) - (guarantee-limited-index end (string-length string) - 'STRING->WIDE-STRING) - (string-length string))) - (start - (if (if (default-object? start) #f start) - (guarantee-limited-index start end 'STRING->WIDE-STRING) - 0)) - (v (make-vector (fix:- end start)))) - (do ((i start (fix:+ i 1)) - (j 0 (fix:+ j 1))) - ((not (fix:< i end))) - (vector-set! v j (string-ref string i))) - (%make-wide-string v))) + (%convert-string string start end + open-input-string + open-wide-output-string)) (define (wide-string->string string #!optional start end) - (guarantee-wide-string string 'WIDE-STRING->STRING) - (let* ((v (wide-string-contents string)) - (end - (if (if (default-object? end) #f end) - (guarantee-limited-index end (vector-length v) - 'WIDE-STRING->STRING) - (vector-length v))) - (start - (if (if (default-object? start) #f start) - (guarantee-limited-index start end 'WIDE-STRING->STRING) - 0)) - (s (make-string (fix:- end start)))) - (do ((i start (fix:+ i 1)) - (j 0 (fix:+ j 1))) - ((not (fix:< i end))) - (if (fix:< (char->integer (vector-ref v i)) #x100) - (string-set! s j (vector-ref v i)) - (error:bad-range-argument string 'WIDE-STRING->STRING))) - s)) + (%convert-string string start end + open-input-string + open-narrow-output-string)) + +(define (%convert-string string start end open-input open-output) + (let ((input (open-input string start end)) + (output (open-output))) + (let loop () + (let ((c (read-char input))) + (if (not (eof-object? c)) + (begin + (write-char c output) + (loop))))) + (get-output-string! output))) ;;;; UTF-32 representation -(define (source-utf32-be-char source caller) - (source-utf32-char source utf32-be-bytes->code-point caller)) - -(define (source-utf32-le-char source caller) - (source-utf32-char source utf32-le-bytes->code-point caller)) - -(define-integrable (source-utf32-char source combiner caller) - (let ((b0 (source))) - (and b0 - (let* ((b1 (source)) - (b2 (source)) - (b3 (source))) - (if (not (and b1 b2 b3)) - (error "Truncated UTF-32 input.")) - (let ((pt (combiner b0 b1 b2 b3))) - (if (not (legal-code-32? pt)) - (error:not-unicode-code-point pt caller)) - (integer->char pt)))))) +(define open-utf32-be-input-string + (coded-input-opener 'UTF-32BE)) -(define-integrable (utf32-be-bytes->code-point b0 b1 b2 b3) - (+ (* b0 #x01000000) - (fix:lsh b1 16) - (fix:lsh b2 8) - b3)) +(define open-utf32-le-input-string + (coded-input-opener 'UTF-32LE)) -(define-integrable (utf32-le-bytes->code-point b0 b1 b2 b3) - (+ (* b3 #x01000000) - (fix:lsh b2 16) - (fix:lsh b1 8) - b0)) +(define open-utf32-input-string + (ended-input-opener open-utf32-be-input-string + open-utf32-le-input-string)) + +(define call-with-utf32-be-input-string + (input-string-caller open-utf32-be-input-string)) + +(define call-with-utf32-le-input-string + (input-string-caller open-utf32-le-input-string)) + +(define call-with-utf32-input-string + (input-string-caller open-utf32-input-string)) + +(define open-utf32-be-output-string + (coded-output-opener 'UTF-32BE)) -(define-integrable (sink-utf32-be-char char sink) - (let ((pt (char->integer char))) - (sink 0) - (sink (fix:lsh pt -16)) - (sink (fix:lsh pt -8)) - (sink (fix:and pt #xFF)))) +(define open-utf32-le-output-string + (coded-output-opener 'UTF-32LE)) -(define-integrable (sink-utf32-le-char char sink) - (let ((pt (char->integer char))) - (sink (fix:and pt #xFF)) - (sink (fix:lsh pt -8)) - (sink (fix:lsh pt -16)) - (sink 0))) +(define open-utf32-output-string + (ended-output-opener open-utf32-be-output-string + open-utf32-le-output-string)) + +(define call-with-utf32-be-output-string + (output-string-caller open-utf32-be-output-string)) + +(define call-with-utf32-le-output-string + (output-string-caller open-utf32-le-output-string)) + +(define call-with-utf32-output-string + (output-string-caller open-utf32-output-string)) (define (utf32-string->wide-string string #!optional start end) - (utf-string->wide-string string start end - (if (host-big-endian?) - source-utf32-be-char - source-utf32-le-char) - 'UTF32-STRING->WIDE-STRING)) + (if (host-big-endian?) + (utf32-be-string->wide-string string start end) + (utf32-le-string->wide-string string start end))) (define (utf32-be-string->wide-string string #!optional start end) - (utf-string->wide-string string start end source-utf32-be-char - 'UTF32-BE-STRING->WIDE-STRING)) + (%convert-string string start end + open-utf32-be-input-string + open-wide-output-string)) (define (utf32-le-string->wide-string string #!optional start end) - (utf-string->wide-string string start end source-utf32-le-char - 'UTF32-LE-STRING->WIDE-STRING)) - -(define (wide-string->utf32-string string #!optional start end) - (wide-string->utf-string string start end - (if (host-big-endian?) - sink-utf32-be-char - sink-utf32-le-char) - 'WIDE-STRING->UTF32-STRING)) - -(define (wide-string->utf32-be-string string #!optional start end) - (wide-string->utf-string string start end sink-utf32-be-char - 'WIDE-STRING->UTF32-BE-STRING)) - -(define (wide-string->utf32-le-string string #!optional start end) - (wide-string->utf-string string start end sink-utf32-le-char - 'WIDE-STRING->UTF32-LE-STRING)) + (%convert-string string start end + open-utf32-le-input-string + open-wide-output-string)) + +(define (string->utf32-string string #!optional start end) + (if (host-big-endian?) + (string->utf32-be-string string start end) + (string->utf32-le-string string start end))) + +(define (string->utf32-be-string string #!optional start end) + (%convert-string string start end + open-input-string + open-utf32-be-output-string)) + +(define (string->utf32-le-string string #!optional start end) + (%convert-string string start end + open-input-string + open-utf32-le-output-string)) (define (utf32-string-length string #!optional start end) (if (host-big-endian?) - (%utf32-string-length string start end "32BE" utf32-be-bytes->code-point - 'UTF32-STRING-LENGTH) - (%utf32-string-length string start end "32LE" utf32-le-bytes->code-point - 'UTF32-STRING-LENGTH))) + (utf32-be-string-length string start end) + (utf32-le-string-length string start end))) (define (utf32-be-string-length string #!optional start end) (%utf32-string-length string start end "32BE" utf32-be-bytes->code-point @@ -760,11 +764,9 @@ Not used at the moment. (validate-utf32-char string start end combiner))))) (define (utf32-string-valid? string #!optional start end) - (%utf32-string-valid? string start end - (if (host-big-endian?) - utf32-be-bytes->code-point - utf32-le-bytes->code-point) - 'UTF32-STRING-VALID?)) + (if (host-big-endian?) + (utf32-be-string-valid? string start end) + (utf32-le-string-valid? string start end))) (define (utf32-be-string-valid? string #!optional start end) (%utf32-string-valid? string start end utf32-be-bytes->code-point @@ -780,6 +782,18 @@ Not used at the moment. (lambda (string start end) (validate-utf32-char string start end combiner))))) +(define-integrable (utf32-be-bytes->code-point b0 b1 b2 b3) + (+ (* b0 #x01000000) + (fix:lsh b1 16) + (fix:lsh b2 8) + b3)) + +(define-integrable (utf32-le-bytes->code-point b0 b1 b2 b3) + (+ (* b3 #x01000000) + (fix:lsh b2 16) + (fix:lsh b1 8) + b0)) + (define (validate-utf32-char string start end combiner) (define-integrable (n i) @@ -809,92 +823,78 @@ Not used at the moment. ;;;; UTF-16 representation -(define (source-utf16-be-char source caller) - (source-utf16-char source be-bytes->digit16 caller)) - -(define (source-utf16-le-char source caller) - (source-utf16-char source le-bytes->digit16 caller)) - -(define-integrable (source-utf16-char source combinator caller) - (let ((d0 (source-utf16-digit source combinator))) - (and d0 - (integer->char - (if (high-surrogate? d0) - (let ((d1 (source-utf16-digit source combinator))) - (if (not d1) - (error "Truncated UTF-16 input.")) - (if (not (low-surrogate? d1)) - (error "Illegal UTF-16 subsequent digit:" d1)) - (combine-surrogates d0 d1)) - (begin - (if (illegal? d0) - (error:not-unicode-code-point d0 caller)) - d0)))))) - -(define-integrable (source-utf16-digit source combinator) - (let ((b0 (source))) - (and b0 - (let ((b1 (source))) - (if (not b1) - (error "Truncated UTF-16 input.")) - (combinator b0 b1))))) - -(define-integrable (sink-utf16-be-char char sink) - (sink-utf16-char char sink - (lambda (digit sink) - (sink (fix:lsh digit -8)) - (sink (fix:and digit #x00FF))))) - -(define-integrable (sink-utf16-le-char char sink) - (sink-utf16-char char sink - (lambda (digit sink) - (sink (fix:and digit #x00FF)) - (sink (fix:lsh digit -8))))) - -(define-integrable (sink-utf16-char char sink dissecter) - (let ((pt (char->integer char))) - (if (fix:< pt #x10000) - (dissecter pt sink) - (let ((s (fix:- pt #x10000))) - (dissecter (fix:or #xD800 (fix:lsh s -10)) sink) - (dissecter (fix:or #xDC00 (fix:and s #x3FF)) sink))))) - -(define (utf16-string->wide-string string #!optional start end) - (utf-string->wide-string string start end - (if (host-big-endian?) - source-utf16-be-char - source-utf16-le-char) - 'UTF16-STRING->WIDE-STRING)) +(define open-utf16-be-input-string + (coded-input-opener 'UTF-16BE)) -(define (utf16-be-string->wide-string string #!optional start end) - (utf-string->wide-string string start end source-utf16-be-char - 'UTF16-BE-STRING->WIDE-STRING)) +(define open-utf16-le-input-string + (coded-input-opener 'UTF-16LE)) -(define (utf16-le-string->wide-string string #!optional start end) - (utf-string->wide-string string start end source-utf16-le-char - 'UTF16-LE-STRING->WIDE-STRING)) +(define open-utf16-input-string + (ended-input-opener open-utf16-be-input-string + open-utf16-le-input-string)) + +(define call-with-utf16-be-input-string + (input-string-caller open-utf16-be-input-string)) -(define (wide-string->utf16-string string #!optional start end) - (wide-string->utf-string string start end - (if (host-big-endian?) - sink-utf16-be-char - sink-utf16-le-char) - 'WIDE-STRING->UTF16-STRING)) +(define call-with-utf16-le-input-string + (input-string-caller open-utf16-le-input-string)) -(define (wide-string->utf16-be-string string #!optional start end) - (wide-string->utf-string string start end sink-utf16-be-char - 'WIDE-STRING->UTF16-BE-STRING)) +(define call-with-utf16-input-string + (input-string-caller open-utf16-input-string)) -(define (wide-string->utf16-le-string string #!optional start end) - (wide-string->utf-string string start end sink-utf16-le-char - 'WIDE-STRING->UTF16-LE-STRING)) +(define open-utf16-be-output-string + (coded-output-opener 'UTF-16BE)) +(define open-utf16-le-output-string + (coded-output-opener 'UTF-16LE)) + +(define open-utf16-output-string + (ended-output-opener open-utf16-be-output-string + open-utf16-le-output-string)) + +(define call-with-utf16-be-output-string + (output-string-caller open-utf16-be-output-string)) + +(define call-with-utf16-le-output-string + (output-string-caller open-utf16-le-output-string)) + +(define call-with-utf16-output-string + (output-string-caller open-utf16-output-string)) + +(define (utf16-string->wide-string string #!optional start end) + (if (host-big-endian?) + (utf16-be-string->wide-string string start end) + (utf16-le-string->wide-string string start end))) + +(define (utf16-be-string->wide-string string #!optional start end) + (%convert-string string start end + open-utf16-be-input-string + open-wide-output-string)) + +(define (utf16-le-string->wide-string string #!optional start end) + (%convert-string string start end + open-utf16-le-input-string + open-wide-output-string)) + +(define (string->utf16-string string #!optional start end) + (if (host-big-endian?) + (string->utf16-be-string string start end) + (string->utf16-le-string string start end))) + +(define (string->utf16-be-string string #!optional start end) + (%convert-string string start end + open-input-string + open-utf16-be-output-string)) + +(define (string->utf16-le-string string #!optional start end) + (%convert-string string start end + open-input-string + open-utf16-le-output-string)) + (define (utf16-string-length string #!optional start end) (if (host-big-endian?) - (%utf16-string-length string start end "16BE" be-bytes->digit16 - 'UTF16-STRING-LENGTH) - (%utf16-string-length string start end "16LE" le-bytes->digit16 - 'UTF16-STRING-LENGTH))) + (utf16-be-string-length string start end) + (utf16-le-string-length string start end))) (define (utf16-be-string-length string #!optional start end) (%utf16-string-length string start end "16BE" be-bytes->digit16 @@ -909,13 +909,11 @@ Not used at the moment. (encoded-string-length string start end type caller (lambda (string start end) (validate-utf16-char string start end combiner))))) - + (define (utf16-string-valid? string #!optional start end) (if (host-big-endian?) - (%utf16-string-valid? string start end be-bytes->digit16 - 'UTF16-STRING-VALID?) - (%utf16-string-valid? string start end le-bytes->digit16 - 'UTF16-STRING-VALID?))) + (utf16-be-string-valid? string start end) + (utf16-le-string-valid? string start end))) (define (utf16-be-string-valid? string #!optional start end) (%utf16-string-valid? string start end be-bytes->digit16 @@ -930,7 +928,7 @@ Not used at the moment. (encoded-string-valid? string start end (lambda (string start end) (validate-utf16-char string start end combiner))))) - + (define (validate-utf16-char string start end combiner) (define-integrable (n i) @@ -947,10 +945,10 @@ Not used at the moment. (fix:+ start 2))))) start)) -(define-integrable (be-bytes->digit16 b0 b1) +(define (be-bytes->digit16 b0 b1) (fix:or (fix:lsh b0 8) b1)) -(define-integrable (le-bytes->digit16 b0 b1) +(define (le-bytes->digit16 b0 b1) (fix:or (fix:lsh b1 8) b0)) (define-integrable (high-surrogate? n) @@ -982,72 +980,32 @@ Not used at the moment. ;;;; UTF-8 representation -(define (source-utf8-char source caller) - (let ((b0 (source)) - (get-next - (lambda () - (let ((b (source))) - (if (not b) - (error "Truncated UTF-8 input.")) - (if (not (%valid-trailer? b)) - (error "Illegal subsequent UTF-8 byte:" b)) - b)))) - (and b0 - (integer->char - (cond ((fix:< b0 #x80) - b0) - ((fix:< b0 #xE0) - (%vc2 b0) - (%cp2 b0 (get-next))) - ((fix:< b0 #xF0) - (let ((b1 (get-next))) - (%vc3 b0 b1) - (let ((pt (%cp3 b0 b1 (get-next)))) - (if (illegal? pt) - (error:not-unicode-code-point pt caller)) - pt))) - ((fix:< b0 #xF8) - (let ((b1 (get-next))) - (%vc4 b0 b1) - (let ((b2 (get-next))) - (%cp4 b0 b1 b2 (get-next))))) - (else - (error "Illegal UTF-8 byte:" b0))))))) +(define open-utf8-input-string + (coded-input-opener 'UTF-8)) + +(define call-with-utf8-input-string + (input-string-caller open-utf8-input-string)) + +(define open-utf8-output-string + (coded-output-opener 'UTF-8)) + +(define call-with-utf8-output-string + (output-string-caller open-utf8-output-string)) + +(define (string->utf8-string string #!optional start end) + (%convert-string string start end + open-input-string + open-utf8-output-string)) + +(define (utf8-string->string string #!optional start end) + (%convert-string string start end + open-utf8-input-string + open-narrow-output-string)) (define (utf8-string->wide-string string #!optional start end) - (utf-string->wide-string string start end - source-utf8-char - 'UTF8-STRING->WIDE-STRING)) - -(define (sink-utf8-char char sink) - (let ((pt (char->integer char))) - - (define-integrable (initial-char n-bits offset) - (fix:or (fix:and (fix:lsh #xFF (fix:+ n-bits 1)) #xFF) - (fix:lsh pt (fix:- 0 offset)))) - - (define-integrable (subsequent-char offset) - (fix:or #x80 (fix:and (fix:lsh pt (fix:- 0 offset)) #x3F))) - - (cond ((fix:< pt #x00000080) - (sink pt)) - ((fix:< pt #x00000800) - (sink (initial-char 5 6)) - (sink (subsequent-char 0))) - ((fix:< pt #x00010000) - (sink (initial-char 4 12)) - (sink (subsequent-char 6)) - (sink (subsequent-char 0))) - (else - (sink (initial-char 3 18)) - (sink (subsequent-char 12)) - (sink (subsequent-char 6)) - (sink (subsequent-char 0)))))) - -(define (wide-string->utf8-string string #!optional start end) - (wide-string->utf-string string start end - sink-utf8-char - 'WIDE-STRING->UTF8-STRING)) + (%convert-string string start end + open-utf8-input-string + open-wide-output-string)) (define (utf8-string-length string #!optional start end) (with-substring-args string start end 'UTF8-STRING-LENGTH @@ -1063,48 +1021,6 @@ Not used at the moment. (utf8-string-valid? object))) (define-guarantee utf8-string "UTF-8 string") - -(define (string->utf8-string string #!optional start end) - (with-substring-args string start end 'STRING->UTF8-STRING - (let ((string* - (make-string - (fix:+ (fix:- end start) - (let loop ((i start) (n 0)) - (if (fix:< i end) - (loop (fix:+ i 1) - (if (fix:< (vector-8b-ref string i) #x80) - n - (fix:+ n 1))) - n)))))) - (let loop ((i start) (i* 0)) - (if (fix:< i end) - (if (fix:< (vector-8b-ref string i) #x80) - (begin - (vector-8b-set! string* i* (vector-8b-ref string i)) - (loop (fix:+ i 1) (fix:+ i* 1))) - (begin - (vector-8b-set! - string* - i* - (fix:or #xC0 (fix:lsh (vector-8b-ref string i) -6))) - (vector-8b-set! - string* - (fix:+ i* 1) - (fix:or #x80 (fix:and (vector-8b-ref string i) #x3F))) - (loop (fix:+ i 1) (fix:+ i* 2)))))) - string*))) - -(define (utf8-string->string string #!optional start end) - (let ((input (open-input-string string start end))) - (port/set-coding input 'UTF-8) - (call-with-output-string - (lambda (output) - (let loop () - (let ((c (read-char input))) - (if (not (eof-object? c)) - (begin - (write-char c output) - (loop))))))))) (define (validate-utf8-char string start end) @@ -1198,17 +1114,23 @@ Not used at the moment. (else (loop))))))) (define (open-string string start end coding caller) - (cond ((string? string) - (let ((port (open-input-string string start end))) - (if (not (default-object? coding)) - (port/set-coding port coding)) - port)) - ((wide-string? string) - (if (not (default-object? coding)) - (error "Coding not allowed with wide strings:" coding)) - (open-wide-input-string string start end)) - (else - (error:wrong-type-argument string "string" caller)))) + ((cond ((default-object? coding) + open-input-string) + ((string? string) + (case coding + ((UTF-8) open-utf8-input-string) + ((UTF-16) open-utf16-input-string) + ((UTF-16BE) open-utf16-be-input-string) + ((UTF-16LE) open-utf16-le-input-string) + ((UTF-32) open-utf32-input-string) + ((UTF-32BE) open-utf32-be-input-string) + ((UTF-32LE) open-utf32-le-input-string) + (else (error:bad-range-argument coding caller)))) + ((wide-string? string) + (error:bad-range-argument coding caller)) + (else + (error:wrong-type-argument string "string" caller))) + string start end)) (define (alphabet-predicate alphabet) (cond ((alphabet? alphabet) @@ -1216,231 +1138,4 @@ Not used at the moment. ((char-set? alphabet) (lambda (char) (char-set-member? alphabet char))) (else - (error:not-alphabet alphabet 'ALPHABET-PREDICATE)))) - -;;;; Wide string ports - -(define open-wide-output-string) -(define open-wide-input-string) - -(define (initialize-package!) - (set! open-wide-output-string - (let ((type - (make-port-type - `((WRITE-CHAR - ,(lambda (port char) - (guarantee-wide-char char 'WRITE-CHAR) - ((port/state port) char) - 1)) - (EXTRACT-OUTPUT - ,(lambda (port) - (%make-wide-string - (get-output-objects (port/state port))))) - (EXTRACT-OUTPUT! - ,(lambda (port) - (%make-wide-string - (get-output-objects! (port/state port))))) - (WRITE-SELF - ,(lambda (port port*) - port - (write-string " to wide string" port*)))) - #f))) - (lambda () - (make-port type (open-output-object-buffer))))) - (set! open-wide-input-string - (let ((type - (make-port-type - `((PEEK-CHAR - ,(lambda (port) - (or ((port/state port) 'PEEK) - (eof-object)))) - (READ-CHAR - ,(lambda (port) - (or ((port/state port) 'READ) - (eof-object)))) - (UNREAD-CHAR - ,(lambda (port) - ((port/state port) 'UNREAD))) - (WRITE-SELF - ,(lambda (port output-port) - port - (write-string " from wide string" output-port)))) - #f))) - (lambda (string #!optional start end) - (guarantee-wide-string string 'OPEN-WIDE-INPUT-STRING) - (make-port type - (open-input-object-buffer (wide-string-contents string) - start - end - 'OPEN-WIDE-INPUT-STRING))))) - unspecific) - -(define (call-with-wide-output-string generator) - (let ((port (open-wide-output-string))) - (generator port) - (get-output-string port))) - -(define (utf-string->wide-string string start end source-char caller) - (let ((source (open-input-byte-buffer string start end caller))) - (%make-wide-string - (call-with-output-object-buffer - (lambda (sink) - (let loop () - (let ((char (source-char source caller))) - (if char - (begin - (sink char) - (loop)))))))))) - -(define (wide-string->utf-string string start end sink-char caller) - (let ((source - (open-input-object-buffer (wide-string-contents string) start end - caller))) - (call-with-output-byte-buffer - (lambda (sink) - (let loop () - (let ((char (source 'READ))) - (if char - (begin - (sink-char char sink) - (loop))))))))) - -;;;; Byte buffers - -(define (open-output-byte-buffer) - (let ((bytes #f) - (index)) - (lambda (byte) - (case byte - ((EXTRACT-OUTPUT) - (if bytes - (string-head bytes index) - (make-string 0))) - ((EXTRACT-OUTPUT!) - (without-interrupts - (lambda () - (if bytes - (let ((bytes* bytes)) - (set! bytes #f) - (set-string-maximum-length! bytes* index) - bytes*) - (make-string 0))))) - (else - (without-interrupts - (lambda () - (cond ((not bytes) - (set! bytes (make-string 128)) - (set! index 0)) - ((not (fix:< index (string-length bytes))) - (let ((bytes* - (make-string (fix:* (string-length bytes) 2)))) - (string-move! bytes bytes* 0) - (set! bytes bytes*)))) - (vector-8b-set! bytes index byte) - (set! index (fix:+ index 1)) - unspecific))))))) - -(define (get-output-bytes buffer) (buffer 'EXTRACT-OUTPUT)) -(define (get-output-bytes! buffer) (buffer 'EXTRACT-OUTPUT!)) - -(define (call-with-output-byte-buffer generator) - (let ((buffer (open-output-byte-buffer))) - (generator buffer) - (get-output-bytes buffer))) - -(define (open-input-byte-buffer bytes start end caller) - (let* ((end - (if (if (default-object? end) #f end) - (guarantee-limited-index end (string-length bytes) caller) - (string-length bytes))) - (index - (if (if (default-object? start) #f start) - (guarantee-limited-index start end caller) - 0))) - (lambda () - (without-interrupts - (lambda () - (and (fix:< index end) - (let ((byte (vector-8b-ref bytes index))) - (set! index (fix:+ index 1)) - byte))))))) - -;;;; Object buffers - -(define (open-output-object-buffer) - (let ((objects #f) - (index)) - (lambda (object) - (cond ((eq? object extract-output-tag) - (if objects - (vector-head objects index) - (make-vector 0))) - ((eq? object extract-output!-tag) - (without-interrupts - (lambda () - (if objects - (let ((objects* objects)) - (set! objects #f) - (if (fix:< index (vector-length objects*)) - (vector-head objects* index) - objects*)) - (make-vector 0))))) - (else - (without-interrupts - (lambda () - (cond ((not objects) - (set! objects (make-vector 128)) - (set! index 0)) - ((not (fix:< index (vector-length objects))) - (set! objects - (vector-grow objects - (fix:* (vector-length objects) 2))))) - (vector-set! objects index object) - (set! index (fix:+ index 1)) - unspecific))))))) - -(define (get-output-objects buffer) (buffer extract-output-tag)) -(define (get-output-objects! buffer) (buffer extract-output!-tag)) - -(define extract-output-tag (list 'EXTRACT-OUTPUT)) -(define extract-output!-tag (list 'EXTRACT-OUTPUT!)) - -(define (call-with-output-object-buffer generator) - (let ((buffer (open-output-object-buffer))) - (generator buffer) - (get-output-objects buffer))) - -(define (open-input-object-buffer objects start end caller) - (let* ((end - (if (if (default-object? end) #f end) - (guarantee-limited-index end (vector-length objects) caller) - (vector-length objects))) - (index - (if (if (default-object? start) #f start) - (guarantee-limited-index start end caller) - 0))) - (lambda (operation) - (without-interrupts - (lambda () - (case operation - ((PEEK) - (and (fix:< index end) - (vector-ref objects index))) - ((READ) - (and (fix:< index end) - (let ((object (vector-ref objects index))) - (set! index (fix:+ index 1)) - object))) - ((UNREAD) - (if (not (fix:< start index)) - (error "No char to unread.")) - (set! index (fix:- index 1)) - unspecific) - (else - (error "Unknown operation:" operation)))))))) - -(define (guarantee-limited-index index limit caller) - (guarantee-index-fixnum index caller) - (if (not (fix:<= index limit)) - (error:bad-range-argument index caller)) - index) \ No newline at end of file + (error:not-alphabet alphabet 'ALPHABET-PREDICATE)))) \ No newline at end of file diff --git a/v7/src/runtime/url.scm b/v7/src/runtime/url.scm index fec8b0546..9f91df4f2 100644 --- a/v7/src/runtime/url.scm +++ b/v7/src/runtime/url.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: url.scm,v 1.53 2008/01/30 20:02:37 cph Exp $ +$Id: url.scm,v 1.54 2008/07/19 01:41:17 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -467,11 +467,9 @@ USA. ;; works on ISO 8859-1 strings, and we are using UTF-8 strings. (define (uri-string-downcase string) - (call-with-output-string + (call-with-utf8-output-string (lambda (output) - (port/set-coding output 'UTF-8) - (let ((input (open-input-string string))) - (port/set-coding input 'UTF-8) + (let ((input (open-utf8-input-string string))) (let loop () (let ((char (read-char input))) (if (not (eof-object? char)) diff --git a/v7/src/ssp/mod-lisp.scm b/v7/src/ssp/mod-lisp.scm index e462e25a4..40b1902cb 100644 --- a/v7/src/ssp/mod-lisp.scm +++ b/v7/src/ssp/mod-lisp.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: mod-lisp.scm,v 1.38 2008/01/30 20:02:40 cph Exp $ +$Id: mod-lisp.scm,v 1.39 2008/07/19 01:41:17 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -385,7 +385,7 @@ USA. (set-status-header response code) (set-content-type-header response 'text/html) (set-entity response - (call-with-output-string + (call-with-output-bytes (lambda (port) (write-xml (let ((message (status-message code))) diff --git a/v7/src/ssp/xmlrpc.scm b/v7/src/ssp/xmlrpc.scm index e03d1041a..049e2998f 100644 --- a/v7/src/ssp/xmlrpc.scm +++ b/v7/src/ssp/xmlrpc.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: xmlrpc.scm,v 1.16 2008/01/30 20:02:40 cph Exp $ +$Id: xmlrpc.scm,v 1.17 2008/07/19 01:41:17 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -33,7 +33,7 @@ USA. (if (eq? (http-request-method) 'post) (let ((entity (http-request-entity))) (if entity - (let ((document (read-xml (open-input-string entity)))) + (let ((document (read-xml (open-input-bytes entity)))) (if document (write-xml (process-xmlrpc-request document pathname) port) (http-status-response 400 "Ill-formed XML entity"))) diff --git a/v7/src/xml/rdf-nt.scm b/v7/src/xml/rdf-nt.scm index d12bd6320..c098358ed 100644 --- a/v7/src/xml/rdf-nt.scm +++ b/v7/src/xml/rdf-nt.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: rdf-nt.scm,v 1.15 2008/01/30 20:02:42 cph Exp $ +$Id: rdf-nt.scm,v 1.16 2008/07/19 01:41:17 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -121,7 +121,7 @@ USA. (*parser (map intern (match match-language)))) (define (parse-string b) - (let ((port (open-output-string))) + (let ((port (open-utf8-output-string))) (define (loop) (let ((p (get-parser-buffer-pointer b))) @@ -164,7 +164,6 @@ USA. (loop (fix:+ i 1))) #t))) - (port/set-coding port 'UTF-8) (loop))) (define match-ws* @@ -230,8 +229,7 @@ USA. (write-string (symbol-name lang) port))))) (define (write-rdf/nt-literal-text text port) - (let ((text (open-input-string text))) - (port/set-coding text 'UTF-8) + (let ((text (open-utf8-input-string text))) (write-string "\"" port) (let loop () (let ((char (read-char text))) diff --git a/v7/src/xml/turtle.scm b/v7/src/xml/turtle.scm index 00101437f..eb5898c72 100644 --- a/v7/src/xml/turtle.scm +++ b/v7/src/xml/turtle.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: turtle.scm,v 1.43 2008/01/30 20:02:42 cph Exp $ +$Id: turtle.scm,v 1.44 2008/07/19 01:41:17 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -328,7 +328,7 @@ USA. (define (delimited-region-parser name start-delim end-delim alphabet parse-escapes) (lambda (buffer) - (let ((output (open-output-string)) + (let ((output (open-utf8-output-string)) (start (get-parser-buffer-pointer buffer))) (define (read-head) @@ -373,7 +373,6 @@ USA. (define (finish) (vector (get-output-string output))) - (port/set-coding output 'UTF-8) (and (match-parser-buffer-string buffer start-delim) (read-head))))) @@ -771,18 +770,18 @@ USA. (else #f)))) ((rdf-bnode? o) (and (not (inline-bnode o)) - (call-with-output-string + (call-with-utf8-output-string (lambda (port) (write-rdf/nt-bnode o port))))) ((uri? o) - (call-with-output-string + (call-with-utf8-output-string (lambda (port*) (write-uri o (port/rdf-prefix-registry port) port*)))) ((rdf-graph? o) (and (null? (rdf-graph-triples o)) "{}")) ((rdf-literal? o) - (call-with-output-string + (call-with-utf8-output-string (lambda (port) (write-rdf/turtle-literal o port)))) (else @@ -917,8 +916,7 @@ USA. (define (write-literal-text text port) (if (string-find-next-char text #\newline) - (let ((tport (open-input-string text))) - (port/set-coding tport 'UTF-8) + (let ((tport (open-utf8-input-string text))) (write-string "\"\"\"" port) (let loop () (let ((char (read-char tport))) diff --git a/v7/src/xml/xml-output.scm b/v7/src/xml/xml-output.scm index 325552748..da27f715d 100644 --- a/v7/src/xml/xml-output.scm +++ b/v7/src/xml/xml-output.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: xml-output.scm,v 1.43 2008/01/30 20:02:42 cph Exp $ +$Id: xml-output.scm,v 1.44 2008/07/19 01:41:17 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -40,7 +40,7 @@ USA. (write-xml-1 xml port options)))) (define (xml->string xml . options) - (call-with-output-string + (call-with-output-bytes (lambda (port) (set-coding xml port) (write-xml-1 xml port options)))) @@ -501,8 +501,7 @@ USA. (emit-char char ctx)))))) (define (for-each-wide-char string procedure) - (let ((port (open-input-string string))) - (port/set-coding port 'UTF-8) + (let ((port (open-utf8-input-string string))) (let loop () (let ((char (read-char port))) (if (not (eof-object? char)) diff --git a/v7/src/xml/xml-parser.scm b/v7/src/xml/xml-parser.scm index 9eee6448e..c7a5ea0aa 100644 --- a/v7/src/xml/xml-parser.scm +++ b/v7/src/xml/xml-parser.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: xml-parser.scm,v 1.78 2008/01/30 20:02:42 cph Exp $ +$Id: xml-parser.scm,v 1.79 2008/07/19 01:41:18 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -694,9 +694,8 @@ USA. (let ((char (integer->char n))) (if (not (char-in-alphabet? char alphabet:xml-char)) (perror p "Disallowed Unicode character" char)) - (call-with-output-string + (call-with-utf8-output-string (lambda (port) - (port/set-coding port 'UTF-8) (write-char char port)))))))) (*parser (with-pointer p @@ -841,7 +840,7 @@ USA. ;;;; Normalization (define (normalize-attribute-value string) - (call-with-output-string + (call-with-utf8-output-string (lambda (port) (let normalize-string ((string string)) (let ((b (utf8-string->parser-buffer (normalize-line-endings string)))) @@ -875,7 +874,7 @@ USA. (loop)))))))))) (define (trim-attribute-whitespace string) - (call-with-output-string + (call-with-utf8-output-string (lambda (port) (let ((string (string-trim string))) (let ((end (string-length string))) diff --git a/v7/src/xml/xml-struct.scm b/v7/src/xml/xml-struct.scm index b6cef184f..652086c22 100644 --- a/v7/src/xml/xml-struct.scm +++ b/v7/src/xml/xml-struct.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: xml-struct.scm,v 1.59 2008/01/30 20:02:42 cph Exp $ +$Id: xml-struct.scm,v 1.60 2008/07/19 01:41:18 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -169,9 +169,8 @@ USA. (define (canonicalize-char-data object) (cond ((wide-char? object) - (call-with-output-string + (call-with-utf8-output-string (lambda (port) - (port/set-coding port 'UTF-8) (write-char object port)))) ((wide-string? object) (wide-string->utf8-string object)) @@ -485,7 +484,7 @@ USA. (define (xml-stylesheet . items) (make-xml-processing-instructions 'xml-stylesheet - (call-with-output-string + (call-with-utf8-output-string (lambda (port) (for-each (lambda (attr) (write-char #\space port) diff --git a/v7/src/xml/xpath.scm b/v7/src/xml/xpath.scm index 8c3a15b35..6531be651 100644 --- a/v7/src/xml/xpath.scm +++ b/v7/src/xml/xpath.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: xpath.scm,v 1.7 2008/01/30 20:02:43 cph Exp $ +$Id: xpath.scm,v 1.8 2008/07/19 01:41:18 cph Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -181,7 +181,7 @@ USA. (xml-element-name (node-item node))) (define-method node-string ((node )) - (call-with-output-string + (call-with-utf8-output-string (lambda (port) (let loop ((node node)) (stream-for-each (lambda (child) -- 2.25.1