From 22b471d1c221303f4f84ebeab801577e3a51887e Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Fri, 27 Jan 2017 02:31:37 -0800 Subject: [PATCH] Major refactor to use ustring in important places. There is much more work to do but this converts all the textual I/O, parser buffers, pathnames, URIs, and a bunch of the XML code. The older Unicode support in (runtime unicode) is completely gone now. Outside of Edwin, it should be fairly safe to assume that legacy strings are *NOT* UTF-8 encoded. Some specific work items remaining: * Eliminate symbol-name, which violates the non-utf8-legacy rule. * Finish converting the XML code to consistently use ustrings. * Implement real Unicode casing, ordering, and character sets. * Change the parser to use the R7RS-defined character classes. * Isolate Edwin from the runtime system's string implementation, since porting it to Unicode is not worth the trouble. It should be frozen to use only ASCII, not ISO 8859-1 as at present. And last of all: * Once Edwin is isolated, convert the runtime system to use ustrings everywhere, then rename them from "ustring" to "string". --- src/runtime/chrset.scm | 8 +- src/runtime/input.scm | 12 +- src/runtime/keyword.scm | 8 +- src/runtime/numpar.scm | 66 ++-- src/runtime/output.scm | 6 +- src/runtime/packag.scm | 10 +- src/runtime/parser-buffer.scm | 144 +++---- src/runtime/port.scm | 8 +- src/runtime/pp.scm | 43 +- src/runtime/runtime.pkg | 98 ++--- src/runtime/string.scm | 305 ++++----------- src/runtime/stringio.scm | 3 +- src/runtime/swank.scm | 3 +- src/runtime/syncproc.scm | 7 +- src/runtime/unicode.scm | 714 ---------------------------------- src/runtime/unxprm.scm | 6 +- src/runtime/url.scm | 83 ++-- src/runtime/ustring.scm | 46 +-- src/xml/rdf-nt.scm | 4 +- src/xml/rdf-struct.scm | 6 +- src/xml/turtle.scm | 10 +- src/xml/xml-names.scm | 2 +- src/xml/xml-output.scm | 66 ++-- src/xml/xml-parser.scm | 19 +- src/xml/xml-rpc.scm | 3 +- src/xml/xml-struct.scm | 40 +- src/xml/xml.pkg | 4 +- src/xml/xpath.scm | 2 +- 28 files changed, 349 insertions(+), 1377 deletions(-) delete mode 100644 src/runtime/unicode.scm diff --git a/src/runtime/chrset.scm b/src/runtime/chrset.scm index 1b481f10f..945c920d2 100644 --- a/src/runtime/chrset.scm +++ b/src/runtime/chrset.scm @@ -547,16 +547,18 @@ USA. ;;;; Backwards compatibility (define (string->char-set string) - (scalar-values->char-set (map char->integer (ustring->list string)))) + (scalar-values->char-set (map char->integer (string->list string)))) +;; Returns ASCII string: (define (char-set->string char-set) - (list->ustring (char-set-members char-set))) + (list->string (char-set-members char-set))) +;; Returns only ASCII members: (define (char-set-members char-set) (guarantee-char-set char-set 'CHAR-SET-MEMBERS) (let ((low (%char-set-low char-set))) (let loop ((code 0)) - (if (fix:< code #x100) + (if (fix:< code #x80) (if (%low-ref low code) (cons (integer->char code) (loop (fix:+ code 1))) diff --git a/src/runtime/input.scm b/src/runtime/input.scm index 44986b9d2..34f364f08 100644 --- a/src/runtime/input.scm +++ b/src/runtime/input.scm @@ -45,7 +45,7 @@ USA. ((textual-port-operation/peek-char port) port)) (define (input-port/read-string! port string) - (input-port/read-substring! port string 0 (xstring-length string))) + (input-port/read-substring! port string 0 (ustring-length string))) (define (input-port/read-substring! port string start end) (if (< start end) @@ -185,12 +185,12 @@ USA. (guarantee index-fixnum? k 'read-string) (let ((port (optional-input-port port 'read-string))) (if (fix:> k 0) - (let ((string (make-string k))) + (let ((string (make-ustring k))) (let ((n (input-port/read-string! port string))) (cond ((not n) n) - ((fix:> n 0) (if (fix:< n k) (substring string 0 n) string)) + ((fix:> n 0) (if (fix:< n k) (ustring-head string n) string)) (else (eof-object))))) - (make-string 0)))) + (make-ustring 0)))) (define (read #!optional port environment) (parse-object (optional-input-port port 'READ) environment)) @@ -215,10 +215,10 @@ USA. (let ((port (optional-input-port port 'read-string!)) (end (if (default-object? end) - (xstring-length string) + (ustring-length string) (begin (guarantee index-fixnum? end 'read-string!) - (if (not (fix:<= end (xstring-length string))) + (if (not (fix:<= end (ustring-length string))) (error:bad-range-argument end 'read-string!)) end)))) (let ((start diff --git a/src/runtime/keyword.scm b/src/runtime/keyword.scm index ba482b3fc..3307d6a70 100644 --- a/src/runtime/keyword.scm +++ b/src/runtime/keyword.scm @@ -38,15 +38,15 @@ USA. (define-integrable keyword-prefix "#[keyword]") (define (string->keyword string) - (guarantee-string string 'STRING->KEYWORD) - (string->symbol (string-append keyword-prefix string))) + (guarantee ustring? string 'STRING->KEYWORD) + (string->symbol (ustring-append keyword-prefix string))) (define (keyword? object) (and (interned-symbol? object) - (string-prefix? keyword-prefix (symbol-name object)))) + (ustring-prefix? keyword-prefix (symbol->string object)))) (define-guarantee keyword "keyword") (define (keyword->string keyword) (guarantee-keyword keyword 'KEYWORD->STRING) - (string-tail (symbol-name keyword) (string-length keyword-prefix))) \ No newline at end of file + (ustring-tail (symbol->string keyword) (ustring-length keyword-prefix))) \ No newline at end of file diff --git a/src/runtime/numpar.scm b/src/runtime/numpar.scm index 4980a9d28..50454d786 100644 --- a/src/runtime/numpar.scm +++ b/src/runtime/numpar.scm @@ -29,44 +29,32 @@ USA. (declare (usual-integrations)) -(define (string->number string #!optional radix error?) - (if (not (string? string)) - (error:wrong-type-argument string "string" 'STRING->NUMBER)) - (parse-number string 0 (string-length string) radix error? 'STRING->NUMBER)) - -(define (substring->number string start end #!optional radix error?) - (if (not (string? string)) - (error:wrong-type-argument string "string" 'SUBSTRING->NUMBER)) - (if (not (index-fixnum? start)) - (error:wrong-type-argument start "string index" 'SUBSTRING->NUMBER)) - (if (not (index-fixnum? end)) - (error:wrong-type-argument end "string index" 'SUBSTRING->NUMBER)) - (if (not (fix:<= end (string-length string))) - (error:bad-range-argument end 'SUBSTRING->NUMBER)) - (if (not (fix:<= start end)) - (error:bad-range-argument start 'SUBSTRING->NUMBER)) - (parse-number string start end radix error? 'SUBSTRING->NUMBER)) - -(define (parse-number string start end radix error? caller) - (let ((z - (parse-number-1 string start end - (if (default-object? radix) #f radix) - caller))) +(define (string->number string #!optional radix error? start end) + (let* ((caller 'string->number) + (end (fix:end-index end (ustring-length string) caller)) + (start (fix:start-index start end caller)) + (z + (parse-number string start end + (if (default-object? radix) #f radix) + caller))) (if (and (not z) (if (default-object? error?) #f error?)) (error:bad-range-argument string caller)) z)) -(define (parse-number-1 string start end default-radix name) +(define (substring->number string start end #!optional radix error?) + (string->number string radix error? start end)) + +(define (parse-number string start end default-radix name) (if (not (or (eq? #f default-radix) (eq? 2 default-radix) (eq? 8 default-radix) (eq? 10 default-radix) (eq? 16 default-radix))) (error:bad-range-argument default-radix name)) (let loop ((start start) (exactness #f) (radix #f)) (and (fix:< start end) - (if (char=? #\# (string-ref string start)) + (if (char=? #\# (ustring-ref string start)) (let ((start (fix:+ start 1))) (and (fix:< start end) - (let ((char (string-ref string start)) + (let ((char (ustring-ref string start)) (start (fix:+ start 1))) (let ((do-radix (lambda (r) @@ -92,7 +80,7 @@ USA. (define (parse-top-level string start end exactness radix) (and (fix:< start end) - (let ((char (string-ref string start)) + (let ((char (ustring-ref string start)) (start (fix:+ start 1))) (cond ((sign? char) (find-leader string start end @@ -111,7 +99,7 @@ USA. (define (find-leader string start end exactness radix sign) ;; State: leading sign has been seen. (and (fix:< start end) - (let ((char (string-ref string start)) + (let ((char (ustring-ref string start)) (start (fix:+ start 1))) (cond ((char->digit char radix) => (lambda (digit) @@ -131,7 +119,7 @@ USA. (parse-digits string start end integer exactness radix (lambda (start integer exactness sharp?) (if (fix:< start end) - (let ((char (string-ref string start)) + (let ((char (ustring-ref string start)) (start+1 (fix:+ start 1))) (cond ((char=? #\/ char) (parse-denominator-1 string start+1 end @@ -160,7 +148,7 @@ USA. (define (parse-digits string start end integer exactness radix k) (let loop ((start start) (integer integer)) (if (fix:< start end) - (let ((char (string-ref string start))) + (let ((char (ustring-ref string start))) (cond ((char->digit char radix) => (lambda (digit) (loop (fix:+ start 1) @@ -169,7 +157,7 @@ USA. (do ((start (fix:+ start 1) (fix:+ start 1)) (integer (* integer radix) (* integer radix))) ((not (and (fix:< start end) - (char=? #\# (string-ref string start)))) + (char=? #\# (ustring-ref string start)))) (k start integer (or exactness 'IMPLICIT-INEXACT) #t)))) (else (k start integer exactness #f)))) @@ -191,7 +179,7 @@ USA. (define (parse-decimal-1 string start end exactness sign) ;; State: radix is 10, leading dot seen. (and (fix:< start end) - (let ((digit (char->digit (string-ref string start) 10)) + (let ((digit (char->digit (ustring-ref string start) 10)) (start (fix:+ start 1))) (and digit (parse-decimal-2 string start end digit -1 exactness sign))))) @@ -200,7 +188,7 @@ USA. ;; State: radix is 10, dot seen. (let loop ((start start) (integer integer) (exponent exponent)) (if (fix:< start end) - (let ((char (string-ref string start)) + (let ((char (ustring-ref string start)) (start+1 (fix:+ start 1))) (cond ((char->digit char 10) => (lambda (digit) @@ -219,7 +207,7 @@ USA. ;; State: radix is 10, dot and # seen. (let loop ((start start)) (if (fix:< start end) - (let ((char (string-ref string start)) + (let ((char (ustring-ref string start)) (start+1 (fix:+ start 1))) (if (char=? #\# char) (loop start+1) @@ -228,7 +216,7 @@ USA. (finish-real integer exponent exactness sign)))) (define (parse-decimal-4 string start end integer exponent exactness sign) - (if (exponent-marker? (string-ref string start)) + (if (exponent-marker? (ustring-ref string start)) (parse-exponent-1 string (fix:+ start 1) end integer exponent exactness sign) (parse-decimal-5 string start end integer exponent exactness sign))) @@ -237,12 +225,12 @@ USA. ;; State: radix is 10, exponent seen. (define (get-digits start esign) (and (fix:< start end) - (let ((digit (char->digit (string-ref string start) 10))) + (let ((digit (char->digit (ustring-ref string start) 10))) (and digit (let loop ((start (fix:+ start 1)) (eint digit)) (if (fix:< start end) (let ((digit - (char->digit (string-ref string start) 10))) + (char->digit (ustring-ref string start) 10))) (if digit (loop (fix:+ start 1) (+ (* eint 10) digit)) @@ -257,7 +245,7 @@ USA. integer exponent exactness sign)))) (and (fix:< start end) - (let ((esign (string-ref string start))) + (let ((esign (ustring-ref string start))) (if (sign? esign) (get-digits (fix:+ start 1) esign) (get-digits start #f))))) @@ -269,7 +257,7 @@ USA. (define (parse-complex string start end real exactness radix sign) (if (fix:< start end) - (let ((char (string-ref string start)) + (let ((char (ustring-ref string start)) (start+1 (fix:+ start 1)) (exactness (if (eq? 'IMPLICIT-INEXACT exactness) #f exactness))) (cond ((sign? char) diff --git a/src/runtime/output.scm b/src/runtime/output.scm index 708226358..d6ba8286c 100644 --- a/src/runtime/output.scm +++ b/src/runtime/output.scm @@ -35,7 +35,7 @@ USA. ((textual-port-operation/write-char port) port char)) (define (output-port/write-string port string) - (output-port/write-substring port string 0 (xstring-length string))) + (output-port/write-substring port string 0 (ustring-length string))) (define (output-port/write-substring port string start end) ((textual-port-operation/write-substring port) port string start end)) @@ -94,10 +94,10 @@ USA. (let ((port (optional-output-port port 'WRITE-STRING)) (end (if (default-object? end) - (xstring-length string) + (ustring-length string) (begin (guarantee index-fixnum? end 'write-string) - (if (not (fix:<= end (xstring-length string))) + (if (not (fix:<= end (ustring-length string))) (error:bad-range-argument end 'write-string)) end)))) (let ((start diff --git a/src/runtime/packag.scm b/src/runtime/packag.scm index 31f1cd89c..7ac410817 100644 --- a/src/runtime/packag.scm +++ b/src/runtime/packag.scm @@ -100,7 +100,7 @@ USA. package)))) (define-integrable package-name-tag - ((ucode-primitive string->symbol) "#[(package)package-name-tag]")) + '|#[(package)package-name-tag]|) (define (find-package name #!optional error?) (let package-loop ((packages *packages*)) @@ -183,13 +183,13 @@ USA. (let ((p (->pathname pathname))) (pathname-new-type (pathname-new-name p - (string-append + (ustring-append (or (pathname-name p) ;; Interpret dirname/ as dirname/dirname-OS.pkd. (let ((dir (pathname-directory p))) (if (pair? dir) (let ((name (last dir))) - (if (string? name) + (if (ustring? name) name "")) ""))) @@ -270,8 +270,8 @@ USA. (and (pair? clause) (or (eq? (car clause) 'ELSE) (vector-of-type? (car clause) symbol?)) - (vector-of-type? (cdr clause) string?))))) - (vector-of-type? file-case string?)))) + (vector-of-type? (cdr clause) ustring?))))) + (vector-of-type? file-case ustring?)))) (vector? (load-description/initializations object)) (vector? (load-description/finalizations object)))) diff --git a/src/runtime/parser-buffer.scm b/src/runtime/parser-buffer.scm index d7d7d0c2c..743164472 100644 --- a/src/runtime/parser-buffer.scm +++ b/src/runtime/parser-buffer.scm @@ -52,40 +52,21 @@ USA. ;;; that reads from an input port. (define (string->parser-buffer string #!optional start end) - (if (string? string) - (let ((string (string->wide-string string start end))) - (make-parser-buffer string 0 (wide-string-length string) 0 0 #f #t 0)) - (begin - (guarantee wide-string? string 'STRING->PARSER-BUFFER) - (let* ((end - (if (or (default-object? end) (not end)) - (wide-string-length string) - (guarantee-substring-end-index end - (wide-string-length string) - 'STRING->PARSER-BUFFER))) - (start - (if (or (default-object? start) (not start)) - 0 - (guarantee-substring-start-index start end - 'STRING->PARSER-BUFFER)))) - (make-parser-buffer string start end 0 0 #f #t 0))))) - -(define (utf8-string->parser-buffer string #!optional start end) - (let ((ws (utf8-string->wide-string string start end))) - (make-parser-buffer ws 0 (wide-string-length ws) 0 0 #f #t 0))) + (let* ((caller 'string->parser-buffer) + (end (fix:end-index end (ustring-length string) caller)) + (start (fix:start-index start end caller))) + (make-parser-buffer string start end 0 0 #f #t 0))) (define (textual-input-port->parser-buffer port #!optional prefix) (guarantee textual-input-port? port 'textual-input-port->parser-buffer) (if (or (default-object? prefix) (not prefix) - (and (wide-string? prefix) - (zero? (wide-string-length prefix)))) - (make-parser-buffer (make-wide-string min-length) 0 0 0 0 port #f 0) - (begin - (guarantee wide-string? prefix 'textual-input-port->parser-buffer) - (let ((n (wide-string-length prefix))) - (make-parser-buffer (%grow-buffer prefix n (max min-length n)) - 0 n 0 0 port #f 0))))) + (and (ustring? prefix) + (fix:= 0 (ustring-length prefix)))) + (make-parser-buffer (make-ustring min-length) 0 0 0 0 port #f 0) + (let ((n (ustring-length prefix))) + (make-parser-buffer (%grow-buffer prefix n (fix:max min-length n)) + 0 n 0 0 port #f 0)))) (define-integrable min-length 256) @@ -96,11 +77,8 @@ USA. (define (*match-string matcher string #!optional start end) (complete-*match matcher (string->parser-buffer string start end))) -(define (*match-utf8-string matcher string #!optional start end) - (complete-*match matcher (utf8-string->parser-buffer string start end))) - (define (*match-symbol matcher symbol) - (*match-utf8-string matcher (symbol-name symbol))) + (*match-string matcher (symbol->string symbol))) (define (complete-*parse parser buffer) (let ((v (parser buffer))) @@ -111,11 +89,8 @@ USA. (define (*parse-string parser string #!optional start end) (complete-*parse parser (string->parser-buffer string start end))) -(define (*parse-utf8-string parser string #!optional start end) - (complete-*parse parser (utf8-string->parser-buffer string start end))) - (define (*parse-symbol parser symbol) - (*parse-utf8-string parser (symbol-name symbol))) + (*parse-string parser (symbol->string symbol))) (define-structure parser-buffer-pointer (index #f read-only #t) @@ -135,7 +110,7 @@ USA. (set-parser-buffer-line! buffer (parser-buffer-pointer-line p))) (define (get-parser-buffer-tail buffer p) - (call-with-parser-buffer-tail buffer p string->utf8-string)) + (call-with-parser-buffer-tail buffer p ustring-copy)) (define (call-with-parser-buffer-tail buffer p procedure) ;; P must be a buffer pointer previously returned by @@ -181,8 +156,8 @@ USA. ;; characters available, return #F and leave the position unchanged. (and (guarantee-buffer-chars buffer 1) (let ((char - (wide-string-ref (parser-buffer-string buffer) - (parser-buffer-index buffer)))) + (ustring-ref (parser-buffer-string buffer) + (parser-buffer-index buffer)))) (increment-buffer-index! buffer char) char))) @@ -191,15 +166,15 @@ USA. ;; current position. If there is a character available, return it, ;; otherwise return #F. The position is unaffected in either case. (and (guarantee-buffer-chars buffer 1) - (wide-string-ref (parser-buffer-string buffer) - (parser-buffer-index buffer)))) + (ustring-ref (parser-buffer-string buffer) + (parser-buffer-index buffer)))) (define (parser-buffer-ref buffer index) (if (not (index-fixnum? index)) (error:wrong-type-argument index "index" 'PARSER-BUFFER-REF)) (and (guarantee-buffer-chars buffer (fix:+ index 1)) - (wide-string-ref (parser-buffer-string buffer) - (fix:+ (parser-buffer-index buffer) index)))) + (ustring-ref (parser-buffer-string buffer) + (fix:+ (parser-buffer-index buffer) index)))) (define (match-parser-buffer-char buffer char) (match-char buffer char char=?)) @@ -243,8 +218,8 @@ USA. (define-integrable (match-char buffer reference compare) (and (guarantee-buffer-chars buffer 1) (let ((char - (wide-string-ref (parser-buffer-string buffer) - (parser-buffer-index buffer)))) + (ustring-ref (parser-buffer-string buffer) + (parser-buffer-index buffer)))) (and (compare char reference) (begin (increment-buffer-index! buffer char) @@ -252,8 +227,8 @@ USA. (define-integrable (match-char-no-advance buffer reference compare) (and (guarantee-buffer-chars buffer 1) - (compare (wide-string-ref (parser-buffer-string buffer) - (parser-buffer-index buffer)) + (compare (ustring-ref (parser-buffer-string buffer) + (parser-buffer-index buffer)) reference))) (define-integrable (match-char-not buffer reference compare) @@ -281,16 +256,7 @@ USA. (match-string buffer string match-substring-loop-na char-ci=?)) (define-integrable (match-string buffer string loop compare) - (cond ((wide-string? string) - (loop buffer - string 0 (wide-string-length string) - compare wide-string-ref)) - ((string? string) - (loop buffer - string 0 (string-length string) - compare string-ref)) - (else - (error:wrong-type-argument string "string" #f)))) + (loop buffer string 0 (ustring-length string) compare)) (define (match-parser-buffer-substring buffer string start end) (match-substring buffer string start end match-substring-loop char=?)) @@ -305,20 +271,10 @@ USA. (match-substring buffer string start end match-substring-loop-na char-ci=?)) (define-integrable (match-substring buffer string start end loop compare) - (cond ((wide-string? string) - (loop buffer - string start end - compare wide-string-ref)) - ((string? string) - (loop buffer - string start end - compare string-ref)) - (else - (error:wrong-type-argument string "string" #f)))) - + (guarantee ustring? string) + (loop buffer string start end compare)) -(define-integrable (match-substring-loop buffer string start end - compare extract) +(define-integrable (match-substring-loop buffer string start end compare) (and (guarantee-buffer-chars buffer (fix:- end start)) (let ((bs (parser-buffer-string buffer))) (let loop @@ -326,10 +282,10 @@ USA. (bi (parser-buffer-index buffer)) (bl (parser-buffer-line buffer))) (if (fix:< i end) - (and (compare (extract string i) (wide-string-ref bs bi)) + (and (compare (ustring-ref string i) (ustring-ref bs bi)) (loop (fix:+ i 1) (fix:+ bi 1) - (if (char=? (wide-string-ref bs bi) #\newline) + (if (char=? (ustring-ref bs bi) #\newline) (fix:+ bl 1) bl))) (begin @@ -337,13 +293,12 @@ USA. (set-parser-buffer-line! buffer bl) #t)))))) -(define-integrable (match-substring-loop-na buffer string start end - compare extract) +(define-integrable (match-substring-loop-na buffer string start end compare) (and (guarantee-buffer-chars buffer (fix:- end start)) (let ((bs (parser-buffer-string buffer))) (let loop ((i start) (bi (parser-buffer-index buffer))) (if (fix:< i end) - (and (compare (extract string i) (wide-string-ref bs bi)) + (and (compare (ustring-ref string i) (ustring-ref bs bi)) (loop (fix:+ i 1) (fix:+ bi 1))) #t))))) @@ -359,7 +314,7 @@ USA. (let loop ((i i) (n (parser-buffer-line buffer))) (if (fix:< i j) (loop (fix:+ i 1) - (if (char=? (wide-string-ref s i) #\newline) + (if (char=? (ustring-ref s i) #\newline) (fix:+ n 1) n)) (set-parser-buffer-line! buffer n))) @@ -375,23 +330,20 @@ USA. (if (fix:> index 0) (let* ((end* (fix:- end index)) (string* - (let ((n (wide-string-length string))) + (let ((n (ustring-length string))) (if (and (fix:> n min-length) (fix:<= end* (fix:quotient n 4))) - (make-wide-string (fix:quotient n 2)) + (make-ustring (fix:quotient n 2)) string)))) (without-interruption (lambda () - (do ((i index (fix:+ i 1)) - (j 0 (fix:+ j 1))) - ((not (fix:< i end))) - (wide-string-set! string* j (wide-string-ref string i))) + (ustring-copy! string* 0 string index end) (set-parser-buffer-string! buffer string*) (set-parser-buffer-index! buffer 0) (set-parser-buffer-end! buffer end*) (set-parser-buffer-base-offset! buffer - (+ (parser-buffer-base-offset buffer) index))))))) + (fix:+ (parser-buffer-base-offset buffer) index))))))) (set-parser-buffer-start! buffer (parser-buffer-index buffer)))) (define-integrable (guarantee-buffer-chars buffer n) @@ -403,11 +355,11 @@ USA. ;; Don't read more characters than are needed. The XML parser ;; depends on this when doing its character-code detection. (and (not (parser-buffer-at-end? buffer)) - (let ((min-end (+ (parser-buffer-index buffer) n)) + (let ((min-end (fix:+ (parser-buffer-index buffer) n)) (end (parser-buffer-end buffer))) - ;; (assert (> min-end end)) + ;; (assert (fix:> min-end end)) (let ((string (parser-buffer-string buffer))) - (if (> min-end (wide-string-length string)) + (if (fix:> min-end (ustring-length string)) (set-parser-buffer-string! buffer (%grow-buffer string end min-end)))) (let ((port (parser-buffer-port buffer)) @@ -415,12 +367,12 @@ USA. (with-input-port-blocking-mode port 'BLOCKING (lambda () (let loop ((end end)) - (if (< end min-end) + (if (fix:< end min-end) (let ((n-read (input-port/read-substring! port string end min-end))) - (if (> n-read 0) - (let ((end (+ end n-read))) + (if (fix:> n-read 0) + (let ((end (fix:+ end n-read))) (set-parser-buffer-end! buffer end) (loop end)) (begin @@ -430,12 +382,10 @@ USA. (define (%grow-buffer string end min-length) (let ((new-string - (make-wide-string - (let loop ((n (wide-string-length string))) - (if (<= min-length n) + (make-ustring + (let loop ((n (ustring-length string))) + (if (fix:<= min-length n) n - (loop (* n 2))))))) - (do ((i 0 (+ i 1))) - ((not (< i end))) - (wide-string-set! new-string i (wide-string-ref string i))) + (loop (fix:* n 2))))))) + (ustring-copy! new-string 0 string 0 end) new-string)) \ No newline at end of file diff --git a/src/runtime/port.scm b/src/runtime/port.scm index c43238bac..b86ef9bc5 100644 --- a/src/runtime/port.scm +++ b/src/runtime/port.scm @@ -229,7 +229,7 @@ USA. (cond ((not char) #f) ((eof-object? char) 0) (else - (xstring-set! string start char) + (ustring-set! string start char) (let loop ((index (+ start 1))) (if (and (< index end) (char-ready? port)) @@ -237,7 +237,7 @@ USA. (if (or (not char) (eof-object? char)) (- index start) (begin - (xstring-set! string index char) + (ustring-set! string index char) (loop (+ index 1))))) (- index start)))))))) @@ -267,7 +267,7 @@ USA. (let ((write-char (textual-port-operation/write-char port))) (let loop ((i start)) (if (< i end) - (let ((n (write-char port (xstring-ref string i)))) + (let ((n (write-char port (ustring-ref string i)))) (cond ((not n) (and (> i start) (- i start))) ((> n 0) (loop (+ i 1))) (else (- i start)))) @@ -343,7 +343,7 @@ USA. (if (and n (> n 0)) (let ((end (+ start n))) (set-textual-port-previous! port - (xstring-ref string (- end 1))) + (ustring-ref string (- end 1))) (transcribe-substring string start end port))) n)))) (flush-output diff --git a/src/runtime/pp.scm b/src/runtime/pp.scm index c2975e617..a442d9966 100644 --- a/src/runtime/pp.scm +++ b/src/runtime/pp.scm @@ -281,7 +281,7 @@ USA. (define (with-highlight-strings-printed pph thunk) (let ((print-string (lambda (s) - (if (string? s) + (if (ustring? s) (*unparse-string s) (s (output-port)))))) (print-string (pph/start-string pph)) @@ -290,14 +290,14 @@ USA. (define (pph/start-string-length pph) (let ((start (pph/start-string pph))) - (if (string? start) - (string-length start) + (if (ustring? start) + (ustring-length start) 0))) (define (pph/end-string-length pph) (let ((end (pph/end-string pph))) - (if (string? end) - (string-length end) + (if (ustring? end) + (ustring-length end) 0))) (define (pp-top-level expression port as-code? indentation list-depth) @@ -318,7 +318,7 @@ USA. numerical-walk)) (node (numerical-walk expression list-depth))) (if (positive? indentation) - (*unparse-string (make-string indentation #\space))) + (*unparse-string (make-ustring indentation #\space))) (if as-code? (print-node node indentation list-depth) (print-non-code-node node indentation list-depth)) @@ -378,7 +378,7 @@ USA. ((prefix-node? node) (*unparse-string (prefix-node-prefix node)) (let ((new-column - (+ column (string-length (prefix-node-prefix node)))) + (+ column (ustring-length (prefix-node-prefix node)))) (subnode (prefix-node-subnode node))) (if (null? (dispatch-list)) (print-node subnode new-column depth) @@ -717,7 +717,7 @@ USA. (pad-with-spaces column)) (define-integrable (pad-with-spaces n-spaces) - (*unparse-string (make-string n-spaces #\space))) + (*unparse-string (make-ustring n-spaces #\space))) ;;;; Numerical Walk @@ -975,7 +975,7 @@ USA. (update-queue (cdr half-pointer/queue) '(CDR))))) (if (eq? (car half-pointer/queue) (cdr pair)) (make-singleton-list-node - (string-append + (ustring-append ". " (circularity-string (cdr half-pointer/queue)))) (loop (cdr pair) list-breadth half-pointer/queue))) @@ -1201,18 +1201,19 @@ USA. (define (circularity-string queue) (let ((depth (queue-depth queue)) (cdrs (queue/past-cdrs queue))) - (string-append + (ustring-append (cond ((= depth 1) "#[circularity (current parenthetical level") ((= depth 2) "#[circularity (up 1 parenthetical level") (else - (string-append "#[circularity (up " - (number->string (-1+ depth)) - " parenthetical levels"))) + (ustring-append "#[circularity (up " + (number->string (-1+ depth)) + " parenthetical levels"))) (cond ((= cdrs 0) ")]") ((= cdrs 1) ", downstream 1 cdr.)]") (else - (string-append ", downstream " - (number->string cdrs) " cdrs.)]")))))) + (ustring-append ", downstream " + (number->string cdrs) + " cdrs.)]")))))) ;;;; Node Model @@ -1224,7 +1225,7 @@ USA. ;;; be gained by keeping it around. (define (symbol-length symbol) - (string-length + (ustring-length (call-with-output-string (lambda (port) (write symbol port))))) @@ -1240,13 +1241,13 @@ USA. (subnode #f read-only #t)) (define (make-prefix-node prefix subnode) - (cond ((string? subnode) - (string-append prefix subnode)) + (cond ((ustring? subnode) + (ustring-append prefix subnode)) ((prefix-node? subnode) - (make-prefix-node (string-append prefix (prefix-node-prefix subnode)) + (make-prefix-node (ustring-append prefix (prefix-node-prefix subnode)) (prefix-node-subnode subnode))) (else - (%make-prefix-node (+ (string-length prefix) (node-size subnode)) + (%make-prefix-node (+ (ustring-length prefix) (node-size subnode)) prefix subnode)))) @@ -1274,7 +1275,7 @@ USA. ((prefix-node? node) (prefix-node-size node)) ((highlighted-node? node) (highlighted-node/size node)) - (else (string-length node)))) + (else (ustring-length node)))) (define-structure (highlighted-node (conc-name highlighted-node/) diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index b695b547e..e9b620cd9 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -1005,10 +1005,8 @@ USA. (vector-8b-maximum-length string-maximum-length) (vector-8b? string?) error:not-string - error:not-xstring guarantee-string guarantee-string-index - guarantee-xstring hexadecimal->vector-8b make-vector-8b vector-8b->hexadecimal @@ -1068,6 +1066,8 @@ USA. string-hash-mod string-head string-head! + string-joiner + string-joiner* string-length string-lower-case? string-map @@ -1089,6 +1089,7 @@ USA. string-search-backward string-search-forward string-set! + string-splitter string-suffix-ci? string-suffix? string-tail @@ -1139,43 +1140,25 @@ USA. substring-upper-case? substringustring + make-ustring string-for-primitive ;export to (runtime) after 9.3 ustring ustring* ustring->ascii ustring->list - ustring->utf8-string ;temporary scaffolding ustring->vector ustring-any ustring-append + ustring-append* ustring-ascii? ustring-capitalize ustring-ci<=? @@ -1214,7 +1197,6 @@ USA. ustring>=? ustring>? ustring? - utf8-string->ustring ;temporary scaffolding ;; vector->ustring ) (export (runtime predicate-metadata) @@ -1412,9 +1394,6 @@ USA. unicode-char->scalar-value unicode-char? unicode-scalar-value?) - (export (runtime unicode) - legal-code-16? - legal-code-32?) (initialization (initialize-package!))) (define-package (runtime character-set) @@ -1422,10 +1401,22 @@ USA. (parent (runtime)) (export () ;; BEGIN deprecated bindings + (8-bit-alphabet? 8-bit-char-set?) + (alphabet char-set) + (alphabet+ char-set-union) + (alphabet- char-set-difference) + (alphabet->string char-set->string) + (alphabet-predicate char-set-predicate) + (alphabet? char-set?) (error:not-8-bit-alphabet error:not-8-bit-char-set) (error:not-alphabet error:not-char-set) (guarantee-8-bit-alphabet guarantee-8-bit-char-set) (guarantee-alphabet guarantee-char-set) + (string->alphabet string->char-set) + alphabet->char-set + alphabet->scalar-values + char-in-alphabet? + char-set->alphabet error:not-8-bit-char-set error:not-char-set error:not-well-formed-scalar-value-list @@ -1434,30 +1425,17 @@ USA. guarantee-char-set guarantee-well-formed-scalar-value-list guarantee-well-formed-scalar-value-range + scalar-values->alphabet ;; END deprecated bindings - (8-bit-alphabet? 8-bit-char-set?) - ( ) - (alphabet char-set) - (alphabet+ char-set-union) - (alphabet- char-set-difference) - (alphabet->string char-set->string) - (alphabet-predicate char-set-predicate) - (alphabet? char-set?) - (string->alphabet string->char-set) 8-bit-char-set? - - alphabet->char-set - alphabet->scalar-values ascii-range->char-set char-alphabetic? char-alphanumeric? char-ctl? char-graphic? - char-in-alphabet? char-lower-case? char-numeric? char-set - char-set->alphabet char-set->scalar-values char-set-difference char-set-intersection @@ -1494,7 +1472,6 @@ USA. char-wsp? chars->char-set scalar-value-in-char-set? - scalar-values->alphabet scalar-values->char-set string->char-set well-formed-scalar-value-list? @@ -4687,8 +4664,7 @@ USA. get-output-string get-output-string! open-input-string - open-output-string) - (initialization (initialize-package!))) + open-output-string)) (define-package (runtime syntax) (files) @@ -5661,7 +5637,7 @@ USA. (files "parser-buffer") (parent (runtime)) (export () - ;; Deprecated: + ;; BEGIN deprecated bindings (input-port->parser-buffer textual-input-port->parser-buffer) (match-parser-buffer-char-in-alphabet match-parser-buffer-char-in-set) (match-parser-buffer-char-in-alphabet-no-advance @@ -5671,12 +5647,11 @@ USA. (match-parser-buffer-char-not-in-alphabet-no-advance match-parser-buffer-char-not-in-set-no-advance) (match-utf8-char-in-alphabet match-parser-buffer-char-in-set) + ;; END deprecated bindings *match-string *match-symbol - *match-utf8-string *parse-string *parse-symbol - *parse-utf8-string call-with-parser-buffer-tail complete-*match complete-*parse @@ -5716,34 +5691,7 @@ USA. read-parser-buffer-char set-parser-buffer-pointer! string->parser-buffer - textual-input-port->parser-buffer - utf8-string->parser-buffer)) - -(define-package (runtime unicode) - (files "unicode") - (parent (runtime)) - (export () - call-with-utf8-input-string - call-with-utf8-output-string - for-all-chars-in-string? - make-wide-string - open-utf8-input-string - open-utf8-output-string - string->utf8-string - string->wide-string - utf8-string->string - utf8-string->wide-string - utf8-string-length - utf8-string-valid? - utf8-string? - wide-string - wide-string->string - wide-string-index? - wide-string-length - wide-string-ref - wide-string-set! - wide-string? - wide-substring)) + textual-input-port->parser-buffer)) (define-package (runtime uri) (files "url") diff --git a/src/runtime/string.scm b/src/runtime/string.scm index ed22ff271..442474c74 100644 --- a/src/runtime/string.scm +++ b/src/runtime/string.scm @@ -266,24 +266,12 @@ USA. (define (string . objects) (%string-append (map ->string objects))) -(define (utf8-string . objects) - (%string-append (map ->utf8-string objects))) - (define (->string object) (cond ((string? object) object) ((symbol? object) (symbol->string object)) - ((wide-string? object) (wide-string->string object)) ((8-bit-char? object) (make-string 1 object)) (else (%->string object 'STRING)))) -(define (->utf8-string object) - (cond ((string? object) (string->utf8-string object)) - ((symbol? object) (symbol-name object)) - ((wide-string? object) (string->utf8-string object)) - ((unicode-char? object) - (string->utf8-string (wide-string object))) - (else (%->string object 'UTF8-STRING)))) - (define (%->string object caller) (cond ((not object) "") ((number? object) (number->string object)) @@ -417,10 +405,14 @@ USA. (VECTOR-8B-SET! STRING2 START2 CODE) ,(let loop ((i 1)) (if (< i n) - `(LET ((CODE (VECTOR-8B-REF STRING1 (FIX:+ START1 ,i)))) + `(LET ((CODE + (VECTOR-8B-REF STRING1 + (FIX:+ START1 ,i)))) (AND (FIX:< CODE #x80) (BEGIN - (VECTOR-8B-SET! STRING2 (FIX:+ START2 ,i) CODE) + (VECTOR-8B-SET! STRING2 + (FIX:+ START2 ,i) + CODE) ,(loop (+ i 1))))) 'STRING2))))))))) (unrolled-move-right @@ -434,10 +426,14 @@ USA. (VECTOR-8B-SET! STRING2 (FIX:+ START2 ,(- n 1)) CODE) ,(let loop ((i (- n 1))) (if (> i 0) - `(LET ((CODE (VECTOR-8B-REF STRING1 (FIX:+ START1 ,(- i 1))))) + `(LET ((CODE + (VECTOR-8B-REF STRING1 + (FIX:+ START1 ,(- i 1))))) (AND (FIX:< CODE #x80) (BEGIN - (VECTOR-8B-SET! STRING2 (FIX:+ START2 ,(- i 1)) CODE) + (VECTOR-8B-SET! STRING2 + (FIX:+ START2 ,(- i 1)) + CODE) ,(loop (- i 1))))) 'STRING2)))))))))) (let ((n (fix:- end1 start1))) @@ -491,89 +487,6 @@ USA. (loop (cdr strings) (fix:+ index size))) result)))) -(define (decorated-string-append prefix infix suffix strings) - (guarantee-string prefix 'DECORATED-STRING-APPEND) - (guarantee-string infix 'DECORATED-STRING-APPEND) - (guarantee-string suffix 'DECORATED-STRING-APPEND) - (%decorated-string-append prefix infix suffix strings - 'DECORATED-STRING-APPEND)) - -(define (%decorated-string-append prefix infix suffix strings procedure) - (if (pair? strings) - (let ((np (string-length prefix)) - (ni (string-length infix)) - (ns (string-length suffix))) - (guarantee-string (car strings) procedure) - (let ((string - (make-string - (let ((ni* (fix:+ np (fix:+ ni ns)))) - (do ((strings (cdr strings) (cdr strings)) - (count (fix:+ np (string-length (car strings))) - (fix:+ count - (fix:+ ni* - (string-length (car strings)))))) - ((not (pair? strings)) - (fix:+ count ns)) - (guarantee-string (car strings) procedure)))))) - (let ((mp - (lambda (index) - (%substring-move! prefix 0 np string index))) - (mi - (lambda (index) - (%substring-move! infix 0 ni string index))) - (ms - (lambda (index) - (%substring-move! suffix 0 ns string index))) - (mv - (lambda (s index) - (%substring-move! s 0 (string-length s) string index)))) - (let loop - ((strings (cdr strings)) - (index (mv (car strings) (mp 0)))) - (if (pair? strings) - (loop (cdr strings) - (mv (car strings) (mp (mi (ms index))))) - (ms index)))) - string)) - (make-string 0))) - -(define (burst-string string delimiter allow-runs?) - (guarantee-string string 'BURST-STRING) - (let ((end (string-length string))) - (cond ((char? delimiter) - (let loop ((start 0) (index 0) (result '())) - (cond ((fix:= index end) - (reverse! - (if (and allow-runs? (fix:= start index)) - result - (cons (%substring string start index) result)))) - ((char=? delimiter (string-ref string index)) - (loop (fix:+ index 1) - (fix:+ index 1) - (if (and allow-runs? (fix:= start index)) - result - (cons (%substring string start index) result)))) - (else - (loop start (fix:+ index 1) result))))) - ((char-set? delimiter) - (let loop ((start 0) (index 0) (result '())) - (cond ((fix:= index end) - (reverse! - (if (and allow-runs? (fix:= start index)) - result - (cons (%substring string start index) result)))) - ((char-set-member? delimiter (string-ref string index)) - (loop (fix:+ index 1) - (fix:+ index 1) - (if (and allow-runs? (fix:= start index)) - result - (cons (%substring string start index) result)))) - (else - (loop start (fix:+ index 1) result))))) - (else - (error:wrong-type-argument delimiter "character or character set" - 'BURST-STRING))))) - (define (reverse-string string) (guarantee-string string 'REVERSE-STRING) (%reverse-substring string 0 (string-length string))) @@ -608,6 +521,75 @@ USA. (string-set! string j (string-ref string i)) (string-set! string i char))))) +(define (decorated-string-append prefix infix suffix strings) + ((string-joiner* prefix infix suffix) strings)) + +(define (string-joiner infix #!optional prefix suffix) + (let ((joiner (string-joiner* prefix infix suffix))) + (lambda strings + (joiner strings)))) + +(define (string-joiner* infix #!optional prefix suffix) + (let ((prefix (if (default-object? prefix) "" prefix)) + (suffix (if (default-object? suffix) "" suffix))) + (let ((infix (ustring-append suffix infix prefix))) + + (lambda (strings) + (ustring-append* + (if (pair? strings) + (cons* prefix + (car strings) + (let loop ((strings (cdr strings))) + (if (pair? strings) + (cons* infix + (car strings) + (loop (cdr strings))) + (list suffix)))) + '())))))) + +(define (burst-string string delimiter allow-runs?) + ((string-splitter delimiter allow-runs?) string)) + +(define (string-splitter delimiter #!optional allow-runs?) + (let ((predicate (splitter-delimiter->predicate delimiter)) + (allow-runs? (if (default-object? allow-runs?) #t allow-runs?))) + + (lambda (string #!optional start end) + (let* ((end (fix:end-index end (ustring-length string) 'string-splitter)) + (start (fix:start-index start end 'string-splitter))) + + (define (find-start start) + (if allow-runs? + (let loop ((index start)) + (if (fix:< index end) + (if (predicate (ustring-ref string index)) + (loop (fix:+ index 1)) + (find-end index (fix:+ index 1))) + '())) + (find-end start start))) + + (define (find-end start index) + (let loop ((index index)) + (if (fix:< index end) + (if (predicate (ustring-ref string index)) + (cons (ustring-copy string start index) + (find-start (fix:+ index 1))) + (loop (fix:+ index 1))) + (list (ustring-copy string start end))))) + + (find-start start))))) + +(define (splitter-delimiter->predicate delimiter) + (cond ((char? delimiter) (char=-predicate delimiter)) + ((char-set? delimiter) (char-set-predicate delimiter)) + ((unary-procedure? delimiter) delimiter) + (else (error:not-a splitter-delimiter? delimiter 'string-splitter)))) + +(define (splitter-delimiter? object) + (or (char? object) + (char-set? object) + (unary-procedure? object))) + (define (vector-8b->hexadecimal bytes) (define-integrable (hex-char k) (string-ref "0123456789abcdef" (fix:and k #x0F))) @@ -829,7 +811,7 @@ USA. (define (lisp-string->camel-case string #!optional upcase-initial?) (call-with-input-string string (lambda (input) - (call-narrow-output-string + (call-with-output-string (lambda (output) (let loop ((upcase? @@ -1642,97 +1624,6 @@ USA. (outer k (fix:+ q 1))))) pi)) -(define (xstring? object) - (or (string? object) - (wide-string? object))) - -(define (xstring-length string) - (cond ((string? string) (string-length string)) - ((wide-string? string) (wide-string-length string)) - (else (error:not-xstring string 'XSTRING-LENGTH)))) - -(define (xstring-ref string index) - (cond ((string? string) (string-ref string index)) - ((wide-string? string) (wide-string-ref string index)) - (else (error:not-xstring string 'XSTRING-REF)))) - -(define (xstring-set! string index char) - (cond ((string? string) (string-set! string index char)) - ((wide-string? string) (wide-string-set! string index char)) - (else (error:not-xstring string 'XSTRING-SET!)))) - -(define (xstring-move! xstring1 xstring2 start2) - (xsubstring-move! xstring1 0 (xstring-length xstring1) xstring2 start2)) - -(define (xsubstring-move! xstring1 start1 end1 xstring2 start2) - (cond ((or (not (eq? xstring2 xstring1)) (< start2 start1)) - (substring-move-left! xstring1 start1 end1 - xstring2 start2)) - ((> start2 start1) - (substring-move-right! xstring1 start1 end1 - xstring2 start2)))) - -(define (xsubstring xstring start end) - (guarantee-xsubstring xstring start end 'XSUBSTRING) - (let ((string (make-string (- end start)))) - (xsubstring-move! xstring start end string 0) - string)) - -(define (xstring-fill! xstring char) - (cond ((string? xstring) - (string-fill! xstring char)) - (else - (error:not-xstring xstring 'XSTRING-FILL!)))) - -(define (xsubstring-fill! xstring start end char) - (cond ((string? xstring) - (substring-fill! xstring start end char)) - (else - (error:not-xstring xstring 'XSTRING-FILL!)))) - -(define-integrable (xsubstring-find-char xstring start end datum finder caller) - (cond ((string? xstring) - (guarantee-substring xstring start end caller) - (finder xstring start end datum)) - (else - (error:not-xstring xstring caller)))) - -(define (xsubstring-find-next-char xstring start end char) - (guarantee-char char 'XSUBSTRING-FIND-NEXT-CHAR) - (xsubstring-find-char xstring start end (char->ascii char) - (ucode-primitive VECTOR-8B-FIND-NEXT-CHAR) - 'XSUBSTRING-FIND-NEXT-CHAR)) - -(define (xsubstring-find-next-char-ci xstring start end char) - (guarantee-char char 'XSUBSTRING-FIND-NEXT-CHAR-CI) - (xsubstring-find-char xstring start end (char->ascii char) - (ucode-primitive VECTOR-8B-FIND-NEXT-CHAR-CI) - 'XSUBSTRING-FIND-NEXT-CHAR-CI)) - -(define (xsubstring-find-next-char-in-set xstring start end char-set) - (guarantee-char-set char-set 'XSUBSTRING-FIND-NEXT-CHAR-IN-SET) - (xsubstring-find-char xstring start end (char-set-table char-set) - (ucode-primitive SUBSTRING-FIND-NEXT-CHAR-IN-SET) - 'XSUBSTRING-FIND-NEXT-CHAR-IN-SET)) - -(define (xsubstring-find-previous-char xstring start end char) - (guarantee-char char 'XSUBSTRING-FIND-PREVIOUS-CHAR) - (xsubstring-find-char xstring start end (char->ascii char) - (ucode-primitive VECTOR-8B-FIND-PREVIOUS-CHAR) - 'XSUBSTRING-FIND-PREVIOUS-CHAR)) - -(define (xsubstring-find-previous-char-ci xstring start end char) - (guarantee-char char 'XSUBSTRING-FIND-PREVIOUS-CHAR-CI) - (xsubstring-find-char xstring start end (char->ascii char) - (ucode-primitive VECTOR-8B-FIND-PREVIOUS-CHAR-CI) - 'XSUBSTRING-FIND-PREVIOUS-CHAR-CI)) - -(define (xsubstring-find-previous-char-in-set xstring start end char-set) - (guarantee-char-set char-set 'XSUBSTRING-FIND-PREVIOUS-CHAR-IN-SET) - (xsubstring-find-char xstring start end (char-set-table char-set) - (ucode-primitive SUBSTRING-FIND-PREVIOUS-CHAR-IN-SET) - 'XSUBSTRING-FIND-PREVIOUS-CHAR-IN-SET)) - ;;;; Guarantors ;; ;; The guarantors are integrated. Most are structured as combination of @@ -1741,9 +1632,8 @@ USA. ;; meaningful message. Structuring the code this way significantly ;; reduces code bloat from large integrated procedures. -(declare (integrate-operator guarantee-string guarantee-xstring)) +(declare (integrate-operator guarantee-string)) (define-guarantee string "string") -(define-guarantee xstring "xstring") (define-integrable (guarantee-2-strings object1 object2 procedure) (if (not (and (string? object1) (string? object2))) @@ -1759,10 +1649,6 @@ USA. (if (not (index-fixnum? object)) (error:wrong-type-argument object "string index" caller))) -(define-integrable (guarantee-xstring-index object caller) - (if (not (exact-nonnegative-integer? object)) - (error:wrong-type-argument object "xstring index" caller))) - (define-integrable (guarantee-substring string start end caller) (if (not (and (string? string) (index-fixnum? start) @@ -1775,19 +1661,6 @@ USA. (guarantee-string string caller) (guarantee-substring-end-index end (string-length string) caller) (guarantee-substring-start-index start end caller)) - -(define-integrable (guarantee-xsubstring xstring start end caller) - (if (not (and (xstring? xstring) - (exact-nonnegative-integer? start) - (exact-nonnegative-integer? end) - (<= start end) - (<= end (xstring-length xstring)))) - (guarantee-xsubstring/fail xstring start end caller))) - -(define (guarantee-xsubstring/fail xstring start end caller) - (guarantee-xstring xstring caller) - (guarantee-xsubstring-end-index end (xstring-length xstring) caller) - (guarantee-xsubstring-start-index start end caller)) (define-integrable (guarantee-substring-end-index end length caller) (guarantee-string-index end caller) @@ -1801,18 +1674,6 @@ USA. (error:bad-range-argument start caller)) start) -(define-integrable (guarantee-xsubstring-end-index end length caller) - (guarantee-xstring-index end caller) - (if (not (<= end length)) - (error:bad-range-argument end caller)) - end) - -(define-integrable (guarantee-xsubstring-start-index start end caller) - (guarantee-xstring-index start caller) - (if (not (<= start end)) - (error:bad-range-argument start caller)) - start) - (define-integrable (guarantee-2-substrings string1 start1 end1 string2 start2 end2 procedure) diff --git a/src/runtime/stringio.scm b/src/runtime/stringio.scm index 16fca5ed3..82f355b66 100644 --- a/src/runtime/stringio.scm +++ b/src/runtime/stringio.scm @@ -133,8 +133,7 @@ USA. (j start* (fix:+ j 1))) ((not (fix:< i limit)) (set! index i)) - (bytevector-u8-set! bv j - (char->ascii (ustring-ref string i))))) + (bytevector-u8-set! bv j (char->ascii (ustring-ref string i))))) n))))) (define (make-octets-input-type) diff --git a/src/runtime/swank.scm b/src/runtime/swank.scm index 5ef162f77..7a1b39ed5 100644 --- a/src/runtime/swank.scm +++ b/src/runtime/swank.scm @@ -435,7 +435,7 @@ USA. (define (get-object-type-name obj) (cond ((boolean? obj) "boolean") - ((string? obj) "string") + ((ustring? obj) "string") ((char? obj) "char") ((fixnum? obj) "fixnum") ((integer? obj) "integer") @@ -455,7 +455,6 @@ USA. ((symbol? obj) "symbol") ((weak-pair? obj) "weak-pair") ((record-type? obj) "record-type") - ((wide-string? obj) "wide-string") (else (user-object-type obj)))) ;;;; Miscellaneous diff --git a/src/runtime/syncproc.scm b/src/runtime/syncproc.scm index 34290ab33..171378fda 100644 --- a/src/runtime/syncproc.scm +++ b/src/runtime/syncproc.scm @@ -199,12 +199,11 @@ USA. (if nonblock? (set-output-port-blocking-mode! port 'nonblocking)) (receiver - (let ((buffer (make-wide-string bsize))) + (let ((buffer (make-ustring bsize))) (lambda () (with-input-port-blocking-mode process-input 'BLOCKING (lambda () - (let ((n - (input-port/read-string! process-input buffer))) + (let ((n (input-port/read-string! process-input buffer))) (if n (if (fix:> n 0) (output-port/write-substring port buffer 0 n) @@ -236,7 +235,7 @@ USA. (let ((input-port/open? (port/operation port 'INPUT-OPEN?)) (input-port/close (port/operation port 'CLOSE-INPUT))) (if process-output - (let ((buffer (make-wide-string bsize))) + (let ((buffer (make-ustring bsize))) (let ((copy-output (lambda () (let ((n (input-port/read-string! port buffer))) diff --git a/src/runtime/unicode.scm b/src/runtime/unicode.scm deleted file mode 100644 index f89480a46..000000000 --- a/src/runtime/unicode.scm +++ /dev/null @@ -1,714 +0,0 @@ -#| -*-Scheme-*- - -Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, - 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, - 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, - 2017 Massachusetts Institute of Technology - -This file is part of MIT/GNU Scheme. - -MIT/GNU Scheme is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or (at -your option) any later version. - -MIT/GNU Scheme is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with MIT/GNU Scheme; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, -USA. - -|# - -;;;; Unicode support -;;; package: (runtime unicode) - -;;; See "http://www.cl.cam.ac.uk/~mgk25/unicode.html". -;;; -;;; UTF-8 encoding -;;; ============== -;;; -;;; max code encoding -;;; ---------- ----------------------------------------------------- -;;; #x00000080 0xxxxxxx -;;; #x00000800 110xxxxx 10xxxxxx -;;; #x00010000 1110xxxx 10xxxxxx 10xxxxxx -;;; #x00200000 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx -;;; #x04000000 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx -;;; #x80000000 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx -;;; -;;; It is possible to represent codes with over-long sequences, but -;;; this is disallowed. For example, #\A is normally represented as -;;; #x41, but could also be written as #xC1 #x81, or even longer -;;; sequences. -;;; -;;; UTF-16 encoding -;;; =============== -;;; -;;; Codes in the ranges #x0000 through #xD7FF and #xE000 through -;;; #xFFFD are represented as themselves. Codes in the range #x10000 -;;; through #xFFFFF are represented as a pair: -;;; -;;; 110110xxxxxxxxxx 110111xxxxxxxxxx -;;; -;;; where the first 16-bit word contains the MS 10 bits, and the -;;; second contains the LS 10 bits. As for UTF-8, overlong sequences -;;; are disallowed. -;;; -;;; Some UTF-16 documents start with the code #xFEFF, to identify the -;;; endianness of the document. If instead #xFFFE is encountered, the -;;; opposite endianness should be used. - -(declare (usual-integrations)) - -(define-syntax with-substring-args - (sc-macro-transformer - (lambda (form environment) - (if (syntax-match? '(expression expression expression expression - + expression) - (cdr form)) - (let ((string (close-syntax (list-ref form 1) environment)) - (start (close-syntax (list-ref form 2) environment)) - (end (close-syntax (list-ref form 3) environment)) - (caller (close-syntax (list-ref form 4) environment))) - `(BEGIN - (GUARANTEE-STRING ,string ,caller) - (LET* ((,(list-ref form 3) - (IF (IF (DEFAULT-OBJECT? ,end) #F ,end) - (GUARANTEE-LIMITED-INDEX ,end (STRING-LENGTH ,string) - ,caller) - (STRING-LENGTH ,string))) - (,(list-ref form 2) - (IF (IF (DEFAULT-OBJECT? ,start) #F ,start) - (GUARANTEE-LIMITED-INDEX ,start ,(list-ref form 3) - ,caller) - 0))) - ,@(map (let ((excludes - (list (list-ref form 2) (list-ref form 3)))) - (lambda (expr) - (make-syntactic-closure environment excludes expr))) - (list-tail form 5))))) - (ill-formed-syntax form))))) - -(define (guarantee-limited-index index limit caller) - (guarantee-index-fixnum index caller) - (if (not (fix:<= index limit)) - (error:bad-range-argument index caller)) - index) - -(define (encoded-string-length string start end type caller validate-char) - (let loop ((start start) (n 0)) - (if (fix:< start end) - (let ((start* (validate-char string start end))) - (if (not start*) - (error:wrong-type-argument string - (string-append "a UTF-" - type - " string") - caller)) - (loop start* (fix:+ n 1))) - n))) - -(define (encoded-string-valid? string start end validate-char) - (let loop ((start start)) - (if (fix:< start end) - (let ((start* (validate-char string start end))) - (if start* - (loop start*) - #f)) - #t))) - -(define (coded-input-opener coding) - (lambda (string #!optional start end) - (let ((port (open-input-octets string start end))) - (port/set-coding port coding) - (port/set-line-ending port 'NEWLINE) - port))) - -(define (coded-output-opener coding) - (lambda () - (let ((port (open-output-octets))) - (port/set-coding port coding) - (port/set-line-ending port 'NEWLINE) - port))) - -(define (ended-input-opener be le) - (lambda (string #!optional start end) - (if (host-big-endian?) - (be string start end) - (le string start end)))) - -(define (ended-output-opener be le) - (lambda () - (if (host-big-endian?) - (be) - (le)))) - -(define (input-string-caller open-input) - (lambda (string procedure) - (let ((port (open-input string))) - (let ((value (procedure port))) - (close-input-port port) - value)))) - -(define (output-string-caller open-output) - (lambda (procedure) - (let ((port (open-output))) - (procedure port) - (get-output-string! port)))) - -;;;; Unicode strings - -(define-structure (wide-string (type-descriptor ) - (constructor %make-wide-string)) - (contents #f read-only #t)) - -(define-guarantee wide-string "a Unicode string") - -(define (make-wide-string length #!optional char) - (%make-wide-string - (make-vector length - (if (if (default-object? char) #f char) - (begin - (guarantee-unicode-char char 'MAKE-WIDE-STRING) - char) - (integer->char 0))))) - -(define (wide-string . chars) - (for-each (lambda (char) (guarantee-unicode-char char 'WIDE-STRING)) chars) - (%make-wide-string (list->vector chars))) - -(define (wide-string-length string) - (guarantee wide-string? string 'WIDE-STRING-LENGTH) - (%wide-string-length string)) - -(define-integrable (%wide-string-length string) - (vector-length (wide-string-contents string))) - -(define (wide-string-ref string index) - (guarantee wide-string? string 'WIDE-STRING-REF) - (guarantee-wide-string-index index string 'WIDE-STRING-REF) - (%wide-string-ref string index)) - -(define-integrable (%wide-string-ref string index) - (vector-ref (wide-string-contents string) index)) - -(define (wide-string-set! string index char) - (guarantee wide-string? string 'WIDE-STRING-SET!) - (guarantee-wide-string-index index string 'WIDE-STRING-SET!) - (guarantee-unicode-char char 'WIDE-STRING-SET!) - (%wide-string-set! string index char)) - -(define-integrable (%wide-string-set! string index char) - (vector-set! (wide-string-contents string) index char)) - -(define (wide-substring string start end) - (guarantee-wide-substring string start end 'WIDE-SUBSTRING) - (%wide-substring string start end)) - -(define (%wide-substring string start end) - (let ((string* (make-wide-string (fix:- end start)))) - (let ((v1 (wide-string-contents string)) - (v2 (wide-string-contents string*))) - (do ((i start (fix:+ i 1)) - (j 0 (fix:+ j 1))) - ((not (fix:< i end))) - (vector-set! v2 j (vector-ref v1 i)))) - string*)) - -(define (wide-string-index? index string) - (and (index-fixnum? index) - (fix:< index (%wide-string-length string)))) - -(define-integrable (guarantee-wide-string-index index string caller) - (if (not (wide-string-index? index string)) - (error:not-wide-string-index index caller))) - -(define (error:not-wide-string-index index caller) - (error:wrong-type-argument index "a Unicode string index" caller)) - -(define-integrable (guarantee-wide-substring string start end caller) - (if (not (and (wide-string? string) - (index-fixnum? start) - (index-fixnum? end) - (fix:<= start end) - (fix:<= end (%wide-string-length string)))) - (guarantee-wide-substring/fail string start end caller))) - -(define (guarantee-wide-substring/fail string start end caller) - (guarantee wide-string? string caller) - (guarantee-limited-index end (%wide-string-length string) caller) - (guarantee-limited-index start end caller)) - -(define (string->wide-string string #!optional start end) - (%convert-string string start end - open-input-string - open-wide-output-string)) - -(define (wide-string->string string #!optional start end) - (%convert-string string start end - open-input-string - open-narrow-output-string)) - -(define (%convert-string string start end open-input open-output) - (let ((input (open-input string start end)) - (output (open-output))) - (let loop () - (let ((c (read-char input))) - (if (not (eof-object? c)) - (begin - (write-char c output) - (loop))))) - (get-output-string! output))) - -;;;; UTF-32 representation - -(define open-utf32-be-input-string - (coded-input-opener 'UTF-32BE)) - -(define open-utf32-le-input-string - (coded-input-opener 'UTF-32LE)) - -(define open-utf32-input-string - (ended-input-opener open-utf32-be-input-string - open-utf32-le-input-string)) - -(define call-with-utf32-be-input-string - (input-string-caller open-utf32-be-input-string)) - -(define call-with-utf32-le-input-string - (input-string-caller open-utf32-le-input-string)) - -(define call-with-utf32-input-string - (input-string-caller open-utf32-input-string)) - -(define open-utf32-be-output-string - (coded-output-opener 'UTF-32BE)) - -(define open-utf32-le-output-string - (coded-output-opener 'UTF-32LE)) - -(define open-utf32-output-string - (ended-output-opener open-utf32-be-output-string - open-utf32-le-output-string)) - -(define call-with-utf32-be-output-string - (output-string-caller open-utf32-be-output-string)) - -(define call-with-utf32-le-output-string - (output-string-caller open-utf32-le-output-string)) - -(define call-with-utf32-output-string - (output-string-caller open-utf32-output-string)) - -(define (utf32-string->wide-string string #!optional start end) - (if (host-big-endian?) - (utf32-be-string->wide-string string start end) - (utf32-le-string->wide-string string start end))) - -(define (utf32-be-string->wide-string string #!optional start end) - (%convert-string string start end - open-utf32-be-input-string - open-wide-output-string)) - -(define (utf32-le-string->wide-string string #!optional start end) - (%convert-string string start end - open-utf32-le-input-string - open-wide-output-string)) - -(define (string->utf32-string string #!optional start end) - (if (host-big-endian?) - (string->utf32-be-string string start end) - (string->utf32-le-string string start end))) - -(define (string->utf32-be-string string #!optional start end) - (%convert-string string start end - open-input-string - open-utf32-be-output-string)) - -(define (string->utf32-le-string string #!optional start end) - (%convert-string string start end - open-input-string - open-utf32-le-output-string)) - -(define (utf32-string-length string #!optional start end) - (if (host-big-endian?) - (utf32-be-string-length string start end) - (utf32-le-string-length string start end))) - -(define (utf32-be-string-length string #!optional start end) - (%utf32-string-length string start end "32BE" utf32-be-octets->code-point - 'UTF32-BE-STRING-LENGTH)) - -(define (utf32-le-string-length string #!optional start end) - (%utf32-string-length string start end "32LE" utf32-le-octets->code-point - 'UTF32-LE-STRING-LENGTH)) - -(define (%utf32-string-length string start end type combiner caller) - (with-substring-args string start end caller - (encoded-string-length string start end type caller - (lambda (string start end) - (validate-utf32-char string start end combiner))))) - -(define (utf32-string-valid? string #!optional start end) - (if (host-big-endian?) - (utf32-be-string-valid? string start end) - (utf32-le-string-valid? string start end))) - -(define (utf32-be-string-valid? string #!optional start end) - (%utf32-string-valid? string start end utf32-be-octets->code-point - 'UTF32-BE-STRING-VALID?)) - -(define (utf32-le-string-valid? string #!optional start end) - (%utf32-string-valid? string start end utf32-le-octets->code-point - 'UTF32-LE-STRING-VALID?)) - -(define (%utf32-string-valid? string start end combiner caller) - (with-substring-args string start end caller - (encoded-string-valid? string start end - (lambda (string start end) - (validate-utf32-char string start end combiner))))) - -(define-integrable (utf32-be-octets->code-point b0 b1 b2 b3) - (+ (* b0 #x01000000) - (fix:lsh b1 16) - (fix:lsh b2 8) - b3)) - -(define-integrable (utf32-le-octets->code-point b0 b1 b2 b3) - (+ (* b3 #x01000000) - (fix:lsh b2 16) - (fix:lsh b1 8) - b0)) - -(define (validate-utf32-char string start end combiner) - - (define-integrable (n i) - (vector-8b-ref string (fix:+ start i))) - - (if (fix:< start end) - (and (fix:<= (fix:+ start 4) end) - (legal-code-32? (combiner (n 0) (n 1) (n 2) (n 3))) - (fix:+ start 4)) - start)) - -(define (utf32-string? object) - (and (string? object) - (utf32-string-valid? object))) - -(define (utf32-be-string? object) - (and (string? object) - (utf32-be-string-valid? object))) - -(define (utf32-le-string? object) - (and (string? object) - (utf32-le-string-valid? object))) - -(define-guarantee utf32-string "UTF-32 string") -(define-guarantee utf32-be-string "UTF-32BE string") -(define-guarantee utf32-le-string "UTF-32LE string") - -;;;; UTF-16 representation - -(define open-utf16-be-input-string - (coded-input-opener 'UTF-16BE)) - -(define open-utf16-le-input-string - (coded-input-opener 'UTF-16LE)) - -(define open-utf16-input-string - (ended-input-opener open-utf16-be-input-string - open-utf16-le-input-string)) - -(define call-with-utf16-be-input-string - (input-string-caller open-utf16-be-input-string)) - -(define call-with-utf16-le-input-string - (input-string-caller open-utf16-le-input-string)) - -(define call-with-utf16-input-string - (input-string-caller open-utf16-input-string)) - -(define open-utf16-be-output-string - (coded-output-opener 'UTF-16BE)) - -(define open-utf16-le-output-string - (coded-output-opener 'UTF-16LE)) - -(define open-utf16-output-string - (ended-output-opener open-utf16-be-output-string - open-utf16-le-output-string)) - -(define call-with-utf16-be-output-string - (output-string-caller open-utf16-be-output-string)) - -(define call-with-utf16-le-output-string - (output-string-caller open-utf16-le-output-string)) - -(define call-with-utf16-output-string - (output-string-caller open-utf16-output-string)) - -(define (utf16-string->wide-string string #!optional start end) - (if (host-big-endian?) - (utf16-be-string->wide-string string start end) - (utf16-le-string->wide-string string start end))) - -(define (utf16-be-string->wide-string string #!optional start end) - (%convert-string string start end - open-utf16-be-input-string - open-wide-output-string)) - -(define (utf16-le-string->wide-string string #!optional start end) - (%convert-string string start end - open-utf16-le-input-string - open-wide-output-string)) - -(define (string->utf16-string string #!optional start end) - (if (host-big-endian?) - (string->utf16-be-string string start end) - (string->utf16-le-string string start end))) - -(define (string->utf16-be-string string #!optional start end) - (%convert-string string start end - open-input-string - open-utf16-be-output-string)) - -(define (string->utf16-le-string string #!optional start end) - (%convert-string string start end - open-input-string - open-utf16-le-output-string)) - -(define (utf16-string-length string #!optional start end) - (if (host-big-endian?) - (utf16-be-string-length string start end) - (utf16-le-string-length string start end))) - -(define (utf16-be-string-length string #!optional start end) - (%utf16-string-length string start end "16BE" be-octets->digit16 - 'UTF16-BE-STRING-LENGTH)) - -(define (utf16-le-string-length string #!optional start end) - (%utf16-string-length string start end "16LE" le-octets->digit16 - 'UTF16-LE-STRING-LENGTH)) - -(define (%utf16-string-length string start end type combiner caller) - (with-substring-args string start end caller - (encoded-string-length string start end type caller - (lambda (string start end) - (validate-utf16-char string start end combiner))))) - -(define (utf16-string-valid? string #!optional start end) - (if (host-big-endian?) - (utf16-be-string-valid? string start end) - (utf16-le-string-valid? string start end))) - -(define (utf16-be-string-valid? string #!optional start end) - (%utf16-string-valid? string start end be-octets->digit16 - 'UTF16-BE-STRING-VALID?)) - -(define (utf16-le-string-valid? string #!optional start end) - (%utf16-string-valid? string start end le-octets->digit16 - 'UTF16-LE-STRING-VALID?)) - -(define (%utf16-string-valid? string start end combiner caller) - (with-substring-args string start end caller - (encoded-string-valid? string start end - (lambda (string start end) - (validate-utf16-char string start end combiner))))) - -(define (validate-utf16-char string start end combiner) - - (define-integrable (n i) - (vector-8b-ref string (fix:+ start i))) - - (if (fix:< start end) - (and (fix:<= (fix:+ start 2) end) - (let ((d0 (combiner (n 0) (n 1)))) - (if (utf16-high-surrogate? d0) - (and (fix:<= (fix:+ start 4) end) - (utf16-low-surrogate? (combiner (n 2) (n 3))) - (fix:+ start 4)) - (and (legal-code-16? d0) - (fix:+ start 2))))) - start)) - -(define (be-octets->digit16 b0 b1) - (fix:or (fix:lsh b0 8) b1)) - -(define (le-octets->digit16 b0 b1) - (fix:or (fix:lsh b1 8) b0)) - -(define (combine-utf16-surrogates h l) - (guarantee utf16-high-surrogate? h 'combine-utf16-surrogates) - (guarantee utf16-low-surrogate? l 'combine-utf16-surrogates) - (fix:+ (fix:+ (fix:lsh (fix:and h #x3FF) 10) - (fix:and l #x3FF)) - #x10000)) - -(define (split-into-utf16-surrogates n) - (guarantee-unicode-scalar-value n 'split-into-utf16-surrogates) - (let ((n (fix:- n #x10000))) - (values (fix:or (fix:and (fix:lsh n -10) #x03FF) #xD800) - (fix:or (fix:and n #x03FF) #xDC00)))) - -(define (utf16-string? object) - (and (string? object) - (utf16-string-valid? object))) - -(define (utf16-be-string? object) - (and (string? object) - (utf16-be-string-valid? object))) - -(define (utf16-le-string? object) - (and (string? object) - (utf16-le-string-valid? object))) - -(define (utf16-high-surrogate? n) - (and (index-fixnum? n) - (fix:= #xD800 (fix:and #xFC00 n)))) - -(define (utf16-low-surrogate? n) - (and (index-fixnum? n) - (fix:= #xDC00 (fix:and #xFC00 n)))) - -(define-guarantee utf16-string "UTF-16 string") -(define-guarantee utf16-be-string "UTF-16BE string") -(define-guarantee utf16-le-string "UTF-16LE string") -(define-guarantee utf16-high-surrogate "UTF-16 high surrogate") -(define-guarantee utf16-low-surrogate "UTF-16 low surrogate") - -;;;; UTF-8 representation - -(define open-utf8-input-string - (coded-input-opener 'UTF-8)) - -(define call-with-utf8-input-string - (input-string-caller open-utf8-input-string)) - -(define open-utf8-output-string - (coded-output-opener 'UTF-8)) - -(define call-with-utf8-output-string - (output-string-caller open-utf8-output-string)) - -(define (string->utf8-string string #!optional start end) - (%convert-string string start end - open-input-string - open-utf8-output-string)) - -(define (utf8-string->string string #!optional start end) - (%convert-string string start end - open-utf8-input-string - open-narrow-output-string)) - -(define (utf8-string->wide-string string #!optional start end) - (%convert-string string start end - open-utf8-input-string - open-wide-output-string)) - -(define (utf8-string-length string #!optional start end) - (with-substring-args string start end 'UTF8-STRING-LENGTH - (encoded-string-length string start end "8" 'UTF8-STRING-LENGTH - validate-utf8-char))) - -(define (utf8-string-valid? string #!optional start end) - (with-substring-args string start end 'UTF8-STRING-VALID? - (encoded-string-valid? string start end validate-utf8-char))) - -(define (utf8-string? object) - (and (string? object) - (utf8-string-valid? object))) - -(define-guarantee utf8-string "UTF-8 string") - -(define (validate-utf8-char string start end) - - (define-integrable (check-byte i) - (%valid-trailer? (n i))) - - (define-integrable (n i) - (vector-8b-ref string (fix:+ start i))) - - (if (fix:< start end) - (let ((b0 (vector-8b-ref string start))) - (cond ((fix:< b0 #x80) - (fix:+ start 1)) - ((fix:< b0 #xE0) - (and (fix:<= (fix:+ start 2) end) - (check-byte 1) - (%vs2 b0) - (fix:+ start 2))) - ((fix:< b0 #xF0) - (and (fix:<= (fix:+ start 3) end) - (check-byte 1) - (check-byte 2) - (%vs3 b0 (n 1)) - (legal-code-16? (%cp3 b0 (n 1) (n 2))) - (fix:+ start 3))) - ((fix:< b0 #xF8) - (and (fix:<= (fix:+ start 4) end) - (check-byte 1) - (%vs4 b0 (n 1)) - (check-byte 2) - (check-byte 3) - (fix:+ start 4))) - (else #f))) - start)) - -(define-integrable (%vs2 b0) - (fix:> b0 #xC1)) - -(define-integrable (%vs3 b0 b1) - (or (fix:> b0 #xE0) (fix:> b1 #x9F))) - -(define-integrable (%vs4 b0 b1) - (or (fix:> b0 #xF0) (fix:> b1 #x8F))) - -(define-integrable (%cp3 b0 b1 b2) - (fix:or (fix:lsh (fix:and b0 #x0F) 12) - (fix:or (fix:lsh (fix:and b1 #x3F) 6) - (fix:and b2 #x3F)))) - -(define-integrable (%valid-trailer? n) - (fix:= #x80 (fix:and #xC0 n))) - -;;;; Per-character combination predicates - -(define (for-all-chars-in-string? predicate string #!optional start end coding) - (let ((port (open-string string start end coding 'FOR-ALL-CHARS-IN-STRING?))) - (let loop () - (let ((char (read-char port))) - (cond ((eof-object? char) #t) - ((predicate char) (loop)) - (else #f)))))) - -(define (for-any-char-in-string? predicate string #!optional start end coding) - (let ((port (open-string string start end coding 'FOR-ANY-CHAR-IN-STRING?))) - (let loop () - (let ((char (read-char port))) - (cond ((eof-object? char) #f) - ((predicate char) #t) - (else (loop))))))) - -(define (open-string string start end coding caller) - ((cond ((default-object? coding) - open-input-string) - ((string? string) - (case coding - ((UTF-8) open-utf8-input-string) - ((UTF-16) open-utf16-input-string) - ((UTF-16BE) open-utf16-be-input-string) - ((UTF-16LE) open-utf16-le-input-string) - ((UTF-32) open-utf32-input-string) - ((UTF-32BE) open-utf32-be-input-string) - ((UTF-32LE) open-utf32-le-input-string) - (else (error:bad-range-argument coding caller)))) - ((wide-string? string) - (error:bad-range-argument coding caller)) - (else - (error:wrong-type-argument string "string" caller))) - string start end)) \ No newline at end of file diff --git a/src/runtime/unxprm.scm b/src/runtime/unxprm.scm index a60fb76d7..2e0d684b6 100644 --- a/src/runtime/unxprm.scm +++ b/src/runtime/unxprm.scm @@ -471,7 +471,7 @@ USA. (define (os/parse-path-string string) (let ((end (ustring-length string)) - (substring + (extract (lambda (string start end) (pathname-as-directory (usubstring string start end))))) (let loop ((start 0)) @@ -480,9 +480,9 @@ USA. (if index (cons (if (= index start) #f - (usubstring string start index)) + (extract string start index)) (loop (+ index 1))) - (list (usubstring string start end)))) + (list (extract string start end)))) '())))) (define (os/shell-file-name) diff --git a/src/runtime/url.scm b/src/runtime/url.scm index 3295d4ad9..dea2b4212 100644 --- a/src/runtime/url.scm +++ b/src/runtime/url.scm @@ -46,8 +46,8 @@ USA. (if scheme (guarantee-uri-scheme scheme 'MAKE-URI)) (if authority (guarantee-uri-authority authority 'MAKE-URI)) (guarantee-uri-path path 'MAKE-URI) - (if query (guarantee utf8-string? query 'MAKE-URI)) - (if fragment (guarantee utf8-string? fragment 'MAKE-URI)) + (if query (guarantee ustring? query 'MAKE-URI)) + (if fragment (guarantee ustring? fragment 'MAKE-URI)) (if (and authority (pair? path) (path-relative? path)) (error:bad-range-argument path 'MAKE-URI)) (let* ((path (remove-dot-segments path)) @@ -91,7 +91,7 @@ USA. ;;; an empty segment. (define (uri-path? object) - (list-of-type? object utf8-string?)) + (list-of-type? object ustring?)) (define (uri-path-absolute? path) (guarantee-uri-path path 'URI-PATH-ABSOLUTE?) @@ -99,7 +99,7 @@ USA. (define (path-absolute? path) (and (pair? path) - (fix:= (string-length (car path)) 0))) + (fix:= 0 (ustring-length (car path))))) (define (uri-path-relative? path) (guarantee-uri-path path 'URI-PATH-RELATIVE?) @@ -136,10 +136,10 @@ USA. (define interned-uri-authorities) (define (uri-userinfo? object) - (utf8-string? object)) + (ustring? object)) (define (uri-host? object) - (utf8-string? object)) + (ustring? object)) (define (uri-port? object) (exact-nonnegative-integer? object)) @@ -184,10 +184,10 @@ USA. '())))) (define (uri-prefix prefix) - (guarantee utf8-string? prefix 'URI-PREFIX) + (guarantee ustring? prefix 'URI-PREFIX) (lambda (suffix) - (guarantee utf8-string? suffix 'URI-PREFIX) - (string->absolute-uri (string-append prefix suffix)))) + (guarantee ustring? suffix 'URI-PREFIX) + (string->absolute-uri (ustring-append prefix suffix)))) (define (remove-dot-segments path) ;; At all times, (APPEND INPUT (REVERSE OUTPUT)) must be well @@ -199,8 +199,8 @@ USA. (if (pair? input) (let ((segment (car input)) (input (cdr input))) - (if (or (string=? segment "..") - (string=? segment ".")) + (if (or (ustring=? segment "..") + (ustring=? segment ".")) ;; Rules A and D (no-output input) ;; Rule E @@ -211,10 +211,10 @@ USA. (if (pair? input) (let ((segment (car input)) (input (cdr input))) - (cond ((string=? segment ".") + (cond ((ustring=? segment ".") ;; Rule B (maybe-done input output)) - ((string=? segment "..") + ((ustring=? segment "..") ;; Rule C (maybe-done input (if (pair? (cdr output)) @@ -306,21 +306,17 @@ USA. (do-string (lambda (string) (or (hash-table/get interned-uris string #f) - (do-parse (utf8-string->wide-string string)))))) + (do-parse string))))) (cond ((uri? object) (if (predicate object) object (begin (if caller (error:bad-range-argument object caller)) #f))) - ((string? object) + ((ustring? object) (do-string object)) ((symbol? object) - (do-string (symbol-name object))) - ((wide-string? object) - (let ((string (string->utf8-string object))) - (or (hash-table/get interned-uris string #f) - (do-parse object)))) + (do-string (symbol->string object))) (else (if caller (error:not-uri object caller)) #f)))) @@ -335,7 +331,7 @@ USA. (%string->uri parse-relative-uri string start end 'STRING->RELATIVE-URI)) (define (%string->uri parser string start end caller) - (or (and (string? string) + (or (and (ustring? string) (default-object? start) (default-object? end) (hash-table/get interned-uris string #f)) @@ -427,7 +423,7 @@ USA. (define parser:hostport (*parser - (seq (map uri-string-downcase + (seq (map ustring-downcase (alt (match matcher:ip-literal) ;; subsumed by MATCHER:REG-NAME ;;matcher:ipv4-address @@ -438,20 +434,6 @@ USA. (match (+ (char-set char-set:uri-digit))))) (values #f))))) -;; This is a kludge to work around fact that STRING-DOWNCASE only -;; works on ISO 8859-1 strings, and we are using UTF-8 strings. - -(define (uri-string-downcase string) - (call-with-utf8-output-string - (lambda (output) - (let ((input (open-utf8-input-string string))) - (let loop () - (let ((char (read-char input))) - (if (not (eof-object? char)) - (begin - (write-char (char-downcase char) output) - (loop))))))))) - (define matcher:ip-literal (*matcher (seq "[" @@ -604,7 +586,6 @@ USA. (write-encoded segment char-set:uri-segment port)) (define (encode-uri-path-segment segment) - (guarantee-string segment 'ENCODE-URI-PATH-SEGMENT) (call-with-output-string (lambda (port) (write-segment segment port)))) @@ -632,41 +613,41 @@ USA. (char-set char-set:uri-hex)))) (define (decode-component string) - (if (string-find-next-char string #\%) + (if (ustring-find-first-char string #\%) (call-with-output-string (lambda (port) - (let ((end (string-length string))) + (let ((end (ustring-length string))) (let loop ((i 0)) (if (fix:< i end) - (if (char=? (string-ref string i) #\%) + (if (char=? #\% (ustring-ref string i)) (begin (write-char (integer->char - (substring->number string - (fix:+ i 1) - (fix:+ i 3) - 16 - #t)) + (string->number string + 16 + #t + (fix:+ i 1) + (fix:+ i 3))) port) (loop (fix:+ i 3))) (begin - (write-char (string-ref string i) port) + (write-char (ustring-ref string i) port) (loop (fix:+ i 1))))))))) string)) (define (write-encoded string unescaped port) - (write-encoded-substring string 0 (string-length string) unescaped port)) + (write-encoded-substring string 0 (ustring-length string) unescaped port)) (define (write-encoded-substring string start end unescaped port) (do ((i start (fix:+ i 1))) ((not (fix:< i end))) - (let ((char (string-ref string i))) + (let ((char (ustring-ref string i))) (if (char-set-member? unescaped char) (write-char char port) (begin (write-char #\% port) (write-string (string-pad-left - (string-upcase (number->string (char->integer char) - 16)) + (ustring-upcase (number->string (char->integer char) + 16)) 2 #\0) port)))))) @@ -1048,7 +1029,7 @@ USA. (actions (cdr clause))) `(,(cond ((eq? key 'EOF) `(EOF-OBJECT? CHAR)) - ((fix:= (string-length (symbol-name key)) 1) + ((fix:= 1 (string-length (symbol-name key))) `(CHAR=? CHAR ,(string-ref (symbol-name key) 0))) (else `(CHAR-SET-MEMBER? ,(symbol 'CHAR-SET:URI- key) CHAR))) diff --git a/src/runtime/ustring.scm b/src/runtime/ustring.scm index 8937d66f0..38ae03845 100644 --- a/src/runtime/ustring.scm +++ b/src/runtime/ustring.scm @@ -270,6 +270,12 @@ USA. (set-predicate<=! utf32-string? ustring?) (register-predicate! ->ustring-component? '->ustring-component)) +(define (make-ustring k #!optional char) + (guarantee index-fixnum? k 'make-ustring) + (if (fix:> k 0) + (make-utf32-string k char) + (make-legacy-string 0))) + (define (ustring-length string) (cond ((legacy-string? string) (legacy-string-length string)) ((utf32-string? string) (utf32-string-length string)) @@ -286,6 +292,13 @@ USA. (else (error:not-a ustring? string 'ustring-set!)))) (define (ustring-append . strings) + (%ustring-append* strings)) + +(define (ustring-append* strings) + (guarantee list? strings 'ustring-append*) + (%ustring-append* strings)) + +(define (%ustring-append* strings) (let ((string (do ((strings strings (cdr strings)) (n 0 (fix:+ n (ustring-length (car strings)))) @@ -676,10 +689,10 @@ USA. (%ustring* objects 'ustring*)) (define (%ustring* objects caller) - (apply ustring-append - (map (lambda (object) - (->ustring object caller)) - objects))) + (%ustring-append* + (map (lambda (object) + (->ustring object caller)) + objects))) (define (->ustring object caller) (cond ((not object) "") @@ -702,27 +715,4 @@ USA. (define (string-for-primitive string) (or (ustring->ascii string) - (string->utf8 string))) - -;; temporary scaffolding -(define (ustring->utf8-string string #!optional start end) - (let* ((caller 'ustring->utf8-string) - (end (fix:end-index end (ustring-length string) caller)) - (start (fix:start-index start end caller))) - (cond ((legacy-string? string) - (if (%legacy-string-ascii? string start end) - (legacy-string-copy string start end) - (%string->utf8-string string start end))) - ((utf32-string? string) - (if (%utf32-string-ascii? string start end) - (%utf32-string->ascii string start end) - (%string->utf8-string string start end))) - (else - (error:not-a ustring? string caller))))) - -(define (%string->utf8-string string start end) - (object-new-type (ucode-type string) (string->utf8 string start end))) - -;; temporary scaffolding -(define (utf8-string->ustring string #!optional start end) - (utf8->string (legacy-string->bytevector string) start end)) \ No newline at end of file + (string->utf8 string))) \ No newline at end of file diff --git a/src/xml/rdf-nt.scm b/src/xml/rdf-nt.scm index c29be444a..163e204e0 100644 --- a/src/xml/rdf-nt.scm +++ b/src/xml/rdf-nt.scm @@ -120,7 +120,7 @@ USA. (*parser (map intern (match match-language)))) (define (parse-string b) - (let ((port (open-utf8-output-string))) + (let ((port (open-output-string))) (define (loop) (let ((p (get-parser-buffer-pointer b))) @@ -228,7 +228,7 @@ USA. (write-string (symbol-name lang) port))))) (define (write-rdf/nt-literal-text text port) - (let ((text (open-utf8-input-string text))) + (let ((text (open-input-string text))) (write-string "\"" port) (let loop () (let ((char (read-char text))) diff --git a/src/xml/rdf-struct.scm b/src/xml/rdf-struct.scm index f44fb9bc5..3f4eb9233 100644 --- a/src/xml/rdf-struct.scm +++ b/src/xml/rdf-struct.scm @@ -202,7 +202,7 @@ USA. (define-guarantee rdf-literal "RDF literal") (define (make-rdf-literal text type) - (guarantee utf8-string? text 'MAKE-RDF-LITERAL) + (guarantee ustring? text 'MAKE-RDF-LITERAL) (let ((type (if (or (not type) (language? type)) @@ -322,8 +322,8 @@ USA. (define (make-rdf-qname prefix local) (guarantee-rdf-prefix prefix 'MAKE-RDF-QNAME) - (guarantee utf8-string? local 'MAKE-RDF-QNAME) - (if (not (*match-utf8-string match:name local)) + (guarantee ustring? local 'MAKE-RDF-QNAME) + (if (not (*match-string match:name local)) (error:bad-range-argument local 'MAKE-RDF-QNAME)) (symbol prefix local)) diff --git a/src/xml/turtle.scm b/src/xml/turtle.scm index 988b47863..7f7469fae 100644 --- a/src/xml/turtle.scm +++ b/src/xml/turtle.scm @@ -325,7 +325,7 @@ USA. (define (delimited-region-parser name start-delim end-delim char-set parse-escapes) (lambda (buffer) - (let ((output (open-utf8-output-string)) + (let ((output (open-output-string)) (start (get-parser-buffer-pointer buffer))) (define (read-head) @@ -766,18 +766,18 @@ USA. (else #f)))) ((rdf-bnode? o) (and (not (inline-bnode o)) - (call-with-utf8-output-string + (call-with-output-string (lambda (port) (write-rdf/nt-bnode o port))))) ((uri? o) - (call-with-utf8-output-string + (call-with-output-string (lambda (port*) (write-uri o (port/rdf-prefix-registry port) port*)))) ((rdf-graph? o) (and (null? (rdf-graph-triples o)) "{}")) ((rdf-literal? o) - (call-with-utf8-output-string + (call-with-output-string (lambda (port) (write-rdf/turtle-literal o port)))) (else @@ -912,7 +912,7 @@ USA. (define (write-literal-text text port) (if (string-find-next-char text #\newline) - (let ((tport (open-utf8-input-string text))) + (let ((tport (open-input-string text))) (write-string "\"\"\"" port) (let loop () (let ((char (read-char tport))) diff --git a/src/xml/xml-names.scm b/src/xml/xml-names.scm index 3f654b455..5e57c9070 100644 --- a/src/xml/xml-names.scm +++ b/src/xml/xml-names.scm @@ -130,7 +130,7 @@ USA. (define (string-matcher matcher) (lambda (string #!optional start end) - (matcher (utf8-string->parser-buffer string start end)))) + (matcher (string->parser-buffer string start end)))) (define string-is-xml-qname? (string-matcher match:xml-qname)) (define string-is-xml-name? (string-matcher match:xml-name)) diff --git a/src/xml/xml-output.scm b/src/xml/xml-output.scm index 8194450b8..0c36854d8 100644 --- a/src/xml/xml-output.scm +++ b/src/xml/xml-output.scm @@ -44,11 +44,6 @@ USA. (set-coding xml port) (write-xml-1 xml port options)))) -(define (xml->wide-string xml . options) - (call-with-wide-output-string - (lambda (port) - (write-xml-1 xml port options)))) - (define (set-coding xml port) (if (port/supports-coding? port) (let ((coding @@ -91,9 +86,9 @@ USA. (define (emit-string string ctx) (let ((port (ctx-port ctx))) - (for-each-unicode-char string - (lambda (char) - (write-char char port))))) + (ustring-for-each (lambda (char) + (write-char char port)) + string))) (define (emit-newline ctx) (newline (ctx-port ctx))) @@ -414,23 +409,23 @@ USA. (define (xml-string-columns string) (let ((n 0)) - (for-each-unicode-char string - (lambda (char) - (set! n - (fix:+ n - (case char - ((#\") 6) - ((#\<) 4) - ((#\&) 5) - (else 1)))) - unspecific)) + (ustring-for-each (lambda (char) + (set! n + (fix:+ n + (case char + ((#\") 6) + ((#\<) 4) + ((#\&) 5) + (else 1)))) + unspecific) + string) n)) (define (write-xml-name name ctx) (emit-string (xml-name-string name) ctx)) (define (xml-name-columns name) - (utf8-string-length (xml-name-string name))) + (ustring-length (xml-name-string name))) (define (write-xml-nmtoken nmtoken ctx) (emit-string (symbol-name nmtoken) ctx)) @@ -487,24 +482,15 @@ USA. (emit-char #\space ctx))) (define (write-escaped-string string escapes ctx) - (for-each-unicode-char string - (lambda (char) - (cond ((assq char escapes) - => (lambda (e) - (emit-string (cdr e) ctx))) - (((ctx-char-map ctx) char) - => (lambda (name) - (emit-char #\& ctx) - (emit-string (symbol-name name) ctx) - (emit-char #\; ctx))) - (else - (emit-char char ctx)))))) - -(define (for-each-unicode-char string procedure) - (let ((port (open-utf8-input-string string))) - (let loop () - (let ((char (read-char port))) - (if (not (eof-object? char)) - (begin - (procedure char) - (loop))))))) \ No newline at end of file + (ustring-for-each (lambda (char) + (cond ((assq char escapes) + => (lambda (e) + (emit-string (cdr e) ctx))) + (((ctx-char-map ctx) char) + => (lambda (name) + (emit-char #\& ctx) + (emit-string (symbol-name name) ctx) + (emit-char #\; ctx))) + (else + (emit-char char ctx)))) + string)) \ No newline at end of file diff --git a/src/xml/xml-parser.scm b/src/xml/xml-parser.scm index 316d22bf4..658d8cb54 100644 --- a/src/xml/xml-parser.scm +++ b/src/xml/xml-parser.scm @@ -94,11 +94,6 @@ USA. 'ANY) (guarantee-pi-handlers pi-handlers 'STRING->XML))) -(define (utf8-string->xml string #!optional start end pi-handlers) - (parse-xml (utf8-string->parser-buffer string start end) - 'UTF-8 - (guarantee-pi-handlers pi-handlers 'UTF8-STRING->XML))) - (define (guarantee-pi-handlers object caller) (if (default-object? object) '() @@ -136,7 +131,7 @@ USA. (char->integer c)))) (prefix (lambda (n) - (wide-string (integer->char n)))) + (ustring (integer->char n)))) (lose (lambda bytes (error "Illegal starting bytes:" bytes)))) @@ -679,7 +674,7 @@ USA. (let ((char (integer->char n))) (if (not (char-set-member? char-set:xml-char char)) (perror p "Disallowed Unicode character" char)) - (call-with-utf8-output-string + (call-with-output-string (lambda (port) (write-char char port)))))))) (*parser @@ -825,10 +820,10 @@ USA. ;;;; Normalization (define (normalize-attribute-value string) - (call-with-utf8-output-string + (call-with-output-string (lambda (port) (let normalize-string ((string string)) - (let ((b (utf8-string->parser-buffer (normalize-line-endings string)))) + (let ((b (string->parser-buffer (normalize-line-endings string)))) (let loop () (let* ((p (get-parser-buffer-pointer b)) (char (read-parser-buffer-char b))) @@ -859,7 +854,7 @@ USA. (loop)))))))))) (define (trim-attribute-whitespace string) - (call-with-utf8-output-string + (call-with-output-string (lambda (port) (let ((string (string-trim string))) (let ((end (string-length string))) @@ -988,7 +983,7 @@ USA. (let ((v (expand-entity-value name p (lambda () - (*parse-utf8-string parse-content string))))) + (*parse-string parse-content string))))) (if (not v) (perror p "Malformed entity reference" string)) v)) @@ -1325,7 +1320,7 @@ USA. (string? (vector-ref v 0))) (let ((v* (fluid-let ((*external-expansion?* #t)) - (*parse-utf8-string parser (vector-ref v 0))))) + (*parse-string parser (vector-ref v 0))))) (if (not v*) (perror ptr (string-append "Malformed " description) diff --git a/src/xml/xml-rpc.scm b/src/xml/xml-rpc.scm index 2307499fd..53f913418 100644 --- a/src/xml/xml-rpc.scm +++ b/src/xml/xml-rpc.scm @@ -313,8 +313,7 @@ USA. 'encode-value)))))) (define (encode-string string) - (if (and (utf8-string-valid? string) - (string-of-xml-chars? string)) + (if (string-of-xml-chars? string) string (rpc-elt:base64 (call-with-output-string diff --git a/src/xml/xml-struct.scm b/src/xml/xml-struct.scm index b5d88ceea..b47c7ecb4 100644 --- a/src/xml/xml-struct.scm +++ b/src/xml/xml-struct.scm @@ -164,34 +164,25 @@ USA. (define (xml-char-data? object) (or (unicode-char? object) - (and (or (wide-string? object) - (and (string? object) - (utf8-string-valid? object))) + (and (ustring? object) (string-of-xml-chars? object)))) (define (string-of-xml-chars? string) - (for-all-chars-in-string? (char-set-predicate char-set:xml-char) - string - 0 - (string-length string) - 'UTF-8)) + (ustring-every (char-set-predicate char-set:xml-char) string)) (define (canonicalize-char-data object) (cond ((unicode-char? object) - (call-with-utf8-output-string + (call-with-output-string (lambda (port) (write-char object port)))) - ((wide-string? object) - (string->utf8-string object)) - ((string? object) - (cond ((not (utf8-string-valid? object)) - (error:wrong-type-datum object "valid UTF-8 XML char data")) - ((not (string-of-xml-chars? object)) - (error:wrong-type-datum object "well-formed XML char data")) - (else object))) + ((ustring? object) + (if (not (string-of-xml-chars? object)) + (error:wrong-type-datum object "well-formed XML char data")) + object) ((uri? object) (uri->string object)) - (else (error:wrong-type-datum object "an XML char data")))) + (else + (error:wrong-type-datum object "an XML char data")))) (define-xml-type element (name xml-name?) @@ -520,20 +511,19 @@ USA. (define (xml-comment . strings) (make-xml-comment - (let* ((s (apply string-append (map canonicalize-char-data strings))) - (ws (utf8-string->wide-string s)) - (n (wide-string-length ws))) + (let* ((s (apply ustring-append (map canonicalize-char-data strings))) + (n (ustring-length s))) (if (fix:> n 0) - (string-append - (if (char-whitespace? (wide-string-ref ws 0)) "" " ") + (ustring-append + (if (char-whitespace? (ustring-ref s 0)) "" " ") s - (if (char-whitespace? (wide-string-ref ws (fix:- n 1))) "" " ")) + (if (char-whitespace? (ustring-ref s (fix:- n 1))) "" " ")) " ")))) (define (xml-stylesheet . items) (make-xml-processing-instructions 'xml-stylesheet - (call-with-utf8-output-string + (call-with-output-string (lambda (port) (for-each (lambda (attr) (write-char #\space port) diff --git a/src/xml/xml.pkg b/src/xml/xml.pkg index eb6b77cf7..9c53b1dd9 100644 --- a/src/xml/xml.pkg +++ b/src/xml/xml.pkg @@ -304,7 +304,6 @@ USA. read-xml read-xml-file string->xml - utf8-string->xml xml-processing-instructions-handlers) (export (runtime xml) coding-requires-bom? @@ -317,8 +316,7 @@ USA. (xml->string xml->octets) write-xml write-xml-file - xml->octets - xml->wide-string)) + xml->octets)) (define-package (runtime xml html) (files "xhtml" "xhtml-entities") diff --git a/src/xml/xpath.scm b/src/xml/xpath.scm index 0e33bc1e9..eb7f1efc0 100644 --- a/src/xml/xpath.scm +++ b/src/xml/xpath.scm @@ -180,7 +180,7 @@ USA. (xml-element-name (node-item node))) (define-method node-string ((node )) - (call-with-utf8-output-string + (call-with-output-string (lambda (port) (let loop ((node node)) (stream-for-each (lambda (child) -- 2.25.1