From: Chris Hanson Date: Wed, 26 May 2004 17:05:56 +0000 (+0000) Subject: Add procedures to do output directly to UTF-xx strings. X-Git-Tag: 20090517-FFI~1648 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=8ccc8386a4087652187be4114c3746e0b0941027;p=mit-scheme.git Add procedures to do output directly to UTF-xx strings. --- diff --git a/v7/src/runtime/runtime.pkg b/v7/src/runtime/runtime.pkg index 8a9987674..11e586743 100644 --- a/v7/src/runtime/runtime.pkg +++ b/v7/src/runtime/runtime.pkg @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: runtime.pkg,v 14.483 2004/05/26 15:20:22 cph Exp $ +$Id: runtime.pkg,v 14.484 2004/05/26 17:05:22 cph Exp $ Copyright 1988,1989,1990,1991,1992,1993 Massachusetts Institute of Technology Copyright 1994,1995,1996,1997,1998,1999 Massachusetts Institute of Technology @@ -4504,6 +4504,13 @@ USA. alphabet->code-points alphabet->string alphabet? + call-with-utf16-be-output-string + call-with-utf16-le-output-string + call-with-utf16-output-string + call-with-utf32-be-output-string + call-with-utf32-le-output-string + call-with-utf32-output-string + call-with-utf8-output-string call-with-wide-output-string char-in-alphabet? char-set->alphabet @@ -4517,6 +4524,13 @@ USA. guarantee-wide-string-index guarantee-wide-substring make-wide-string + open-utf16-be-output-string + open-utf16-le-output-string + open-utf16-output-string + open-utf32-be-output-string + open-utf32-le-output-string + open-utf32-output-string + open-utf8-output-string open-wide-input-string open-wide-output-string read-utf16-be-char diff --git a/v7/src/runtime/unicode.scm b/v7/src/runtime/unicode.scm index e189c2c1c..379b76ae6 100644 --- a/v7/src/runtime/unicode.scm +++ b/v7/src/runtime/unicode.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: unicode.scm,v 1.15 2004/02/23 20:50:33 cph Exp $ +$Id: unicode.scm,v 1.16 2004/05/26 17:05:56 cph Exp $ Copyright 2001,2003,2004 Massachusetts Institute of Technology @@ -127,12 +127,19 @@ USA. (error "Illegal input byte:" b)) b)))) -(define-integrable (write-byte byte port) - (write-char (integer->char byte) port)) +(define (port->byte-sink port) + (lambda (byte) + (write-char (integer->char byte) port))) + +(define ((call-with-output-string-constructor open-output-string) generator) + (let ((port (open-output-string))) + (generator port) + (get-output-string port))) (define (initialize-package!) (initialize-output-port!) (initialize-input-port!) + (initialize-utf-output-ports!) unspecific) ;;;; Unicode characters @@ -248,30 +255,29 @@ USA. (%code-points->alphabet items)) (define (%code-points->alphabet items) - (call-with-values (lambda () (split-list items #x800)) - (lambda (low-items high-items) - (let ((low (make-alphabet-low))) - (for-each (lambda (item) - (if (pair? item) - (do ((i (car item) (fix:+ i 1))) - ((fix:> i (cdr item))) - (alphabet-low-set! low i)) - (alphabet-low-set! low item))) - low-items) - (let ((n-high (length high-items))) - (let ((high1 (make-vector n-high)) - (high2 (make-vector n-high))) - (do ((items high-items (cdr items)) - (i 0 (fix:+ i 1))) - ((not (pair? items))) - (if (pair? (car items)) - (begin - (vector-set! high1 i (caar items)) - (vector-set! high2 i (cdar items))) - (begin - (vector-set! high1 i (car items)) - (vector-set! high2 i (car items))))) - (make-alphabet low high1 high2))))))) + (receive (low-items high-items) (split-list items #x800) + (let ((low (make-alphabet-low))) + (for-each (lambda (item) + (if (pair? item) + (do ((i (car item) (fix:+ i 1))) + ((fix:> i (cdr item))) + (alphabet-low-set! low i)) + (alphabet-low-set! low item))) + low-items) + (let ((n-high (length high-items))) + (let ((high1 (make-vector n-high)) + (high2 (make-vector n-high))) + (do ((items high-items (cdr items)) + (i 0 (fix:+ i 1))) + ((not (pair? items))) + (if (pair? (car items)) + (begin + (vector-set! high1 i (caar items)) + (vector-set! high2 i (cdar items))) + (begin + (vector-set! high1 i (car items)) + (vector-set! high2 i (car items))))) + (make-alphabet low high1 high2)))))) (define (split-list items limit) (let loop ((items items) (low '())) @@ -415,16 +421,14 @@ USA. (reduce alphabet+2 null-alphabet alphabets)) (define (alphabet+2 a1 a2) - (call-with-values - (lambda () - (alphabet-high+2 (alphabet-high1 a1) - (alphabet-high2 a1) - (alphabet-high1 a2) - (alphabet-high2 a2))) - (lambda (high1 high2) - (make-alphabet (alphabet-low+2 (alphabet-low a1) (alphabet-low a2)) - high1 - high2)))) + (receive (high1 high2) + (alphabet-high+2 (alphabet-high1 a1) + (alphabet-high2 a1) + (alphabet-high1 a2) + (alphabet-high2 a2)) + (make-alphabet (alphabet-low+2 (alphabet-low a1) (alphabet-low a2)) + high1 + high2))) (define (alphabet-low+2 low1 low2) (let ((low (make-alphabet-low))) @@ -471,16 +475,14 @@ USA. (values lower upper)))))) (define (alphabet- a1 a2) - (call-with-values - (lambda () - (alphabet-high- (alphabet-high1 a1) - (alphabet-high2 a1) - (alphabet-high1 a2) - (alphabet-high2 a2))) - (lambda (high1 high2) - (make-alphabet (alphabet-low- (alphabet-low a1) (alphabet-low a2)) - high1 - high2)))) + (receive (high1 high2) + (alphabet-high- (alphabet-high1 a1) + (alphabet-high2 a1) + (alphabet-high1 a2) + (alphabet-high2 a2)) + (make-alphabet (alphabet-low- (alphabet-low a1) (alphabet-low a2)) + high1 + high2))) (define (alphabet-low- low1 low2) (let ((low (make-alphabet-low))) @@ -620,53 +622,52 @@ USA. (guarantee-substring-end-index end (%wide-string-length string) caller) (guarantee-substring-start-index start end caller)) -(define (call-with-wide-output-string generator) - (let ((port (open-wide-output-string))) - (generator port) - (get-output-string port))) - -(define (open-wide-output-string) - (make-port ws-output-port-type - (let ((v (make-vector 17))) - (vector-set! v 0 0) - v))) +(define open-wide-output-string) +(define call-with-wide-output-string) -(define ws-output-port-type) (define (initialize-output-port!) - (set! ws-output-port-type - (make-port-type - `((WRITE-CHAR - ,(lambda (port char) - (guarantee-wide-char char 'WRITE-CHAR) - (without-interrupts - (lambda () - (let* ((v (port/state port)) - (n (fix:+ (vector-ref v 0) 1))) - (if (fix:< n (vector-length v)) - (begin - (vector-set! v n char) - (vector-set! v 0 n)) - (let ((v - (vector-grow v - (fix:- (fix:* (vector-length v) 2) - 1)))) - (vector-set! v n char) - (vector-set! v 0 n) - (set-port/state! port v) - v))))) - 1)) - (EXTRACT-OUTPUT! - ,(lambda (port) - (%make-wide-string - (without-interrupts - (lambda () - (let ((v (port/state port))) - (subvector v 1 (fix:+ (vector-ref v 0) 1)))))))) - (WRITE-SELF - ,(lambda (port port*) - port - (write-string " to wide string" port*)))) - #f)) + (set! open-wide-output-string + (let ((type + (make-port-type + `((WRITE-CHAR + ,(lambda (port char) + (guarantee-wide-char char 'WRITE-CHAR) + (without-interrupts + (lambda () + (let* ((v (port/state port)) + (n (fix:+ (vector-ref v 0) 1))) + (if (fix:< n (vector-length v)) + (begin + (vector-set! v n char) + (vector-set! v 0 n)) + (let ((v + (vector-grow v + (fix:- (fix:* (vector-length v) 2) + 1)))) + (vector-set! v n char) + (vector-set! v 0 n) + (set-port/state! port v) + v))))) + 1)) + (EXTRACT-OUTPUT! + ,(lambda (port) + (%make-wide-string + (without-interrupts + (lambda () + (let ((v (port/state port))) + (subvector v 1 (fix:+ (vector-ref v 0) 1)))))))) + (WRITE-SELF + ,(lambda (port port*) + port + (write-string " to wide string" port*)))) + #f))) + (lambda () + (make-port type + (let ((v (make-vector 17))) + (vector-set! v 0 0) + v))))) + (set! call-with-wide-output-string + (call-with-output-string-constructor open-wide-output-string)) unspecific) (define (string->wide-string string #!optional start end) @@ -795,25 +796,25 @@ USA. (define (write-utf32-be-char char port) (guarantee-wide-char char 'WRITE-UTF32-BE-CHAR) - (%write-utf32-be-char char port)) + (sink-utf32-be-char char (port->byte-sink port))) (define (write-utf32-le-char char port) (guarantee-wide-char char 'WRITE-UTF32-LE-CHAR) - (%write-utf32-le-char char port)) + (sink-utf32-le-char char (port->byte-sink port))) -(define-integrable (%write-utf32-be-char char port) +(define-integrable (sink-utf32-be-char char sink) (let ((pt (char->integer char))) - (write-byte 0 port) - (write-byte (fix:lsh pt -16) port) - (write-byte (fix:lsh pt -8) port) - (write-byte (fix:and pt #xFF) port))) + (sink 0) + (sink (fix:lsh pt -16)) + (sink (fix:lsh pt -8)) + (sink (fix:and pt #xFF)))) -(define-integrable (%write-utf32-le-char char port) +(define-integrable (sink-utf32-le-char char sink) (let ((pt (char->integer char))) - (write-byte (fix:and pt #xFF) port) - (write-byte (fix:lsh pt -8) port) - (write-byte (fix:lsh pt -16) port) - (write-byte 0 port))) + (sink (fix:and pt #xFF)) + (sink (fix:lsh pt -8)) + (sink (fix:lsh pt -16)) + (sink 0))) (define (utf32-string->wide-string string #!optional start end) (%utf32-string->wide-string string @@ -851,30 +852,30 @@ USA. (if (default-object? start) #f start) (if (default-object? end) #f end) (if (host-big-endian?) - %write-utf32-be-char - %write-utf32-le-char))) + sink-utf32-be-char + sink-utf32-le-char))) (define (wide-string->utf32-be-string string #!optional start end) (%wide-string->utf32-string string (if (default-object? start) #f start) (if (default-object? end) #f end) - %write-utf32-be-char)) + sink-utf32-be-char)) (define (wide-string->utf32-le-string string #!optional start end) (%wide-string->utf32-string string (if (default-object? start) #f start) (if (default-object? end) #f end) - %write-utf32-le-char)) + sink-utf32-le-char)) -(define (%wide-string->utf32-string string start end write-utf32-char) +(define (%wide-string->utf32-string string start end sink-utf32-char) (let ((input (open-wide-input-string string start end))) - (call-with-output-string - (lambda (output) + (call-with-output-byte-buffer + (lambda (sink) (let loop () (let ((char (read-char input))) (if (not (eof-object? char)) (begin - (write-utf32-char char output) + (sink-utf32-char char sink) (loop))))))))) (define (utf32-string-length string #!optional start end) @@ -1001,32 +1002,31 @@ USA. (define (write-utf16-be-char char port) (guarantee-wide-char char 'WRITE-UTF16-BE-CHAR) - (%write-utf16-be-char char port)) + (sink-utf16-be-char char (port->byte-sink port))) (define (write-utf16-le-char char port) (guarantee-wide-char char 'WRITE-UTF16-LE-CHAR) - (%write-utf16-le-char char port)) - -(define-integrable (%write-utf16-be-char char port) - (%write-utf16-char char port - (lambda (digit output) - (output (fix:lsh digit -8)) - (output (fix:and digit #x00FF))))) - -(define-integrable (%write-utf16-le-char char port) - (%write-utf16-char char port - (lambda (digit output) - (output (fix:and digit #x00FF)) - (output (fix:lsh digit -8))))) - -(define-integrable (%write-utf16-char char port dissecter) - (let ((pt (char->integer char)) - (write-byte (lambda (byte) (write-byte byte port)))) + (sink-utf16-le-char char (port->byte-sink port))) + +(define-integrable (sink-utf16-be-char char sink) + (sink-utf16-char char sink + (lambda (digit sink) + (sink (fix:lsh digit -8)) + (sink (fix:and digit #x00FF))))) + +(define-integrable (sink-utf16-le-char char sink) + (sink-utf16-char char sink + (lambda (digit sink) + (sink (fix:and digit #x00FF)) + (sink (fix:lsh digit -8))))) + +(define-integrable (sink-utf16-char char sink dissecter) + (let ((pt (char->integer char))) (if (fix:< pt #x10000) - (dissecter pt write-byte) + (dissecter pt sink) (let ((s (fix:- pt #x10000))) - (dissecter (fix:or #xD800 (fix:lsh s -10)) write-byte) - (dissecter (fix:or #xDC00 (fix:and s #x3FF)) write-byte))))) + (dissecter (fix:or #xD800 (fix:lsh s -10)) sink) + (dissecter (fix:or #xDC00 (fix:and s #x3FF)) sink))))) (define (utf16-string->wide-string string #!optional start end) (%utf16-string->wide-string string @@ -1064,30 +1064,30 @@ USA. (if (default-object? start) #f start) (if (default-object? end) #f end) (if (host-big-endian?) - %write-utf16-be-char - %write-utf16-le-char))) + sink-utf16-be-char + sink-utf16-le-char))) (define (wide-string->utf16-be-string string #!optional start end) (%wide-string->utf16-string string (if (default-object? start) #f start) (if (default-object? end) #f end) - %write-utf16-be-char)) + sink-utf16-be-char)) (define (wide-string->utf16-le-string string #!optional start end) (%wide-string->utf16-string string (if (default-object? start) #f start) (if (default-object? end) #f end) - %write-utf16-le-char)) + sink-utf16-le-char)) -(define (%wide-string->utf16-string string start end write-utf16-char) +(define (%wide-string->utf16-string string start end sink-utf16-char) (let ((input (open-wide-input-string string start end))) - (call-with-output-string - (lambda (output) + (call-with-output-byte-buffer + (lambda (sink) (let loop () (let ((char (read-char input))) (if (not (eof-object? char)) (begin - (write-utf16-char char output) + (sink-utf16-char char sink) (loop))))))))) (define (utf16-string-length string #!optional start end) @@ -1248,9 +1248,9 @@ USA. (define (write-utf8-char char port) (guarantee-wide-char char 'WRITE-UTF8-CHAR) - (%write-utf8-char char port)) + (sink-utf8-char char (port->byte-sink port))) -(define (%write-utf8-char char port) +(define (sink-utf8-char char sink) (let ((pt (char->integer char))) (define-integrable (initial-char n-bits offset) @@ -1261,32 +1261,32 @@ USA. (fix:or #x80 (fix:and (fix:lsh pt (fix:- 0 offset)) #x3F))) (cond ((fix:< pt #x00000080) - (write-byte pt port)) + (sink pt)) ((fix:< pt #x00000800) - (write-byte (initial-char 5 6) port) - (write-byte (subsequent-char 0) port)) + (sink (initial-char 5 6)) + (sink (subsequent-char 0))) ((fix:< pt #x00010000) - (write-byte (initial-char 4 12) port) - (write-byte (subsequent-char 6) port) - (write-byte (subsequent-char 0) port)) + (sink (initial-char 4 12)) + (sink (subsequent-char 6)) + (sink (subsequent-char 0))) (else - (write-byte (initial-char 3 18) port) - (write-byte (subsequent-char 12) port) - (write-byte (subsequent-char 6) port) - (write-byte (subsequent-char 0) port))))) + (sink (initial-char 3 18)) + (sink (subsequent-char 12)) + (sink (subsequent-char 6)) + (sink (subsequent-char 0)))))) (define (wide-string->utf8-string string #!optional start end) (let ((input (open-wide-input-string string (if (default-object? start) #f start) (if (default-object? end) #f end)))) - (call-with-output-string - (lambda (output) + (call-with-output-byte-buffer + (lambda (sink) (let loop () (let ((char (read-char input))) (if (not (eof-object? char)) (begin - (%write-utf8-char char output) + (sink-utf8-char char sink) (loop))))))))) (define (utf8-string-length string #!optional start end) @@ -1368,4 +1368,108 @@ USA. (fix:and b3 #x3F))))) (define-integrable (%valid-trailer? n) - (fix:= #x80 (fix:and #xC0 n))) \ No newline at end of file + (fix:= #x80 (fix:and #xC0 n))) + +(define open-utf8-output-string) +(define call-with-utf8-output-string) +(define open-utf16-output-string) +(define call-with-utf16-output-string) +(define open-utf16-be-output-string) +(define call-with-utf16-be-output-string) +(define open-utf16-le-output-string) +(define call-with-utf16-le-output-string) +(define open-utf32-output-string) +(define call-with-utf32-output-string) +(define open-utf32-be-output-string) +(define call-with-utf32-be-output-string) +(define open-utf32-le-output-string) +(define call-with-utf32-le-output-string) + +(define (initialize-utf-output-ports!) + (let ((make-opener + (lambda (sink-char coding-name) + (let ((type + (make-port-type + `((WRITE-CHAR + ,(lambda (port char) + (guarantee-wide-char char 'WRITE-CHAR) + (sink-char char (port/state port)) + 1)) + (EXTRACT-OUTPUT! + ,(lambda (port) + (get-output-bytes (port/state port)))) + (WRITE-SELF + ,(let ((description + (string-append " to " coding-name " string"))) + (lambda (port port*) + port + (write-string description port*))))) + #f))) + (lambda () + (make-port type (open-output-byte-buffer))))))) + (let-syntax + ((define-openers + (sc-macro-transformer + (lambda (form environment) + (if (syntax-match? '(SYMBOL DATUM expression) (cdr form)) + (let ((n0 (symbol-append (cadr form) '-OUTPUT-STRING))) + (let ((n1 (symbol-append 'OPEN- n0)) + (n2 (symbol-append 'CALL-WITH- n0))) + `(BEGIN + (SET! ,n1 + (MAKE-OPENER ,(cadddr form) ,(caddr form))) + (SET! ,n2 + (CALL-WITH-OUTPUT-STRING-CONSTRUCTOR ,n1))))) + (ill-formed-syntax form)))))) + + (define-openers utf8 "UTF-8" sink-utf8-char) + + (define-openers utf16 "UTF-16" + (if (host-big-endian?) + sink-utf16-be-char + sink-utf16-le-char)) + (define-openers utf16-be "UTF-16BE" sink-utf16-be-char) + (define-openers utf16-le "UTF-16LE" sink-utf16-le-char) + + (define-openers utf32 "UTF-32" + (if (host-big-endian?) + sink-utf32-be-char + sink-utf32-le-char)) + (define-openers utf32-be "UTF-32BE" sink-utf32-be-char) + (define-openers utf32-le "UTF-32LE" sink-utf32-le-char) + + unspecific))) + +;;;; Byte buffers + +(define (open-output-byte-buffer) + (let ((bytes #f) + (index)) + (lambda (byte) + (if (eq? byte 'EXTRACT-OUTPUT!) + (without-interrupts + (lambda () + (set-string-maximum-length! bytes index) + (let ((bytes* bytes)) + (set! bytes #f) + bytes*))) + (begin + (cond ((not bytes) + (set! bytes (make-string 128)) + (set! index 0)) + ((not (fix:< index (string-length bytes))) + (let ((n (fix:* (string-length bytes) 2))) + (let ((bytes* (make-string n))) + (string-move! bytes bytes* 0) + (set! bytes bytes*))))) + (vector-8b-set! bytes index byte) + (set! index (fix:+ index 1)) + unspecific))))) + +(define (get-output-bytes buffer) + (buffer 'EXTRACT-OUTPUT!)) + +(define (call-with-output-byte-buffer generator) + (let ((buffer (open-output-byte-buffer))) + (generator buffer) + (get-output-bytes buffer))) \ No newline at end of file