From aafeee81eea3921e043d0332314eb4e44da176fa Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Wed, 25 Jan 2017 00:40:54 -0800 Subject: [PATCH] Initial draft of new string implementation. --- src/runtime/runtime.pkg | 105 ++++++++---- src/runtime/string.scm | 31 ---- src/runtime/xstring.scm | 353 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 430 insertions(+), 59 deletions(-) create mode 100644 src/runtime/xstring.scm diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index 6d4dd39bc..9bfea3364 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -973,17 +973,54 @@ USA. (export () ;; BEGIN deprecated bindings (guarantee-vector-8b guarantee-string) + (set-vector-8b-length! set-string-length!) + (vector-8b-length string-length) + (vector-8b-maximum-length string-maximum-length) + (vector-8b? string?) error:not-string error:not-xstring guarantee-string guarantee-string-index guarantee-xstring - legacy-string? + hexadecimal->vector-8b + make-vector-8b + vector-8b->hexadecimal + vector-8b-fill! + vector-8b-find-next-char + vector-8b-find-next-char-ci + vector-8b-find-previous-char + vector-8b-find-previous-char-ci + vector-8b-ref + vector-8b-set! ;; END deprecated bindings - (set-vector-8b-length! set-string-length!) - (vector-8b-length string-length) - (vector-8b-maximum-length string-maximum-length) - (vector-8b? string?) + (legacy-string string) + (legacy-string->list string->list) + (legacy-string->vector string->vector) + (legacy-string-append string-append) + (legacy-string-ci<=? string-ci<=?) + (legacy-string-ci=? string-ci>=?) + (legacy-string-ci>? string-ci>?) + (legacy-string-copy string-copy) + (legacy-string-copy! string-copy!) + (legacy-string-downcase string-downcase) + (legacy-string-fill! string-fill!) + (legacy-string-for-each string-for-each) + (legacy-string-length string-length) + (legacy-string-map string-map) + (legacy-string-ref string-ref) + (legacy-string-set! string-set!) + (legacy-string-upcase string-upcase) + (legacy-string<=? string<=?) + (legacy-string=? string>=?) + (legacy-string>? string>?) + (legacy-string? string?) + (legacy-substring substring) + (list->legacy-string list->string) + (make-legacy-string make-string) ascii-string-copy burst-string camel-case-string->lisp @@ -992,11 +1029,9 @@ USA. guarantee-substring guarantee-substring-end-index guarantee-substring-start-index - hexadecimal->vector-8b lisp-string->camel-case list->string make-string - make-vector-8b reverse-string reverse-string! reverse-substring @@ -1107,21 +1142,7 @@ USA. substring=? substring? utf8-string - vector-8b->hexadecimal - vector-8b-fill! - vector-8b-find-next-char - vector-8b-find-next-char-ci - vector-8b-find-previous-char - vector-8b-find-previous-char-ci - vector-8b-ref - vector-8b-set! - xstring-fill! - xstring-length xstring-move! - xstring-ref - xstring-set! - xstring? - xsubstring xsubstring-fill! xsubstring-find-next-char xsubstring-find-next-char-ci @@ -1130,10 +1151,42 @@ USA. xsubstring-find-previous-char-ci xsubstring-find-previous-char-in-set xsubstring-move!) - (export (runtime generic-i/o-port) - %substring-move!) (initialization (initialize-package!))) +(define-package (runtime xstring) + (files "xstring") + (parent (runtime)) + (export () + (list->xstring list->utf32-string) + (make-xstring make-utf32-string) + (xstring utf32-string) + (xsubstring xstring-copy) + xstring->list + xstring->vector + xstring-append + ;; xstring-ci<=? + ;; xstring-ci=? + ;; xstring-ci>? + xstring-copy + xstring-copy! + ;; xstring-downcase + xstring-fill! + ;; xstring-foldcase + xstring-for-each + xstring-length + xstring-map + xstring-ref + xstring-set! + ;; xstring-upcase + ;; xstring<=? + ;; xstring=? + ;; xstring>? + xstring?)) + (define-package (runtime bytevector) (files "bytevector") (parent (runtime)) @@ -5747,11 +5800,7 @@ USA. wide-string-ref wide-string-set! wide-string? - wide-substring) - (export (runtime generic-i/o-port) - wide-string-contents) - (export (runtime input-port) - wide-string-contents)) + wide-substring)) (define-package (runtime uri) (files "url") diff --git a/src/runtime/string.scm b/src/runtime/string.scm index 30c3a4121..7bc8efd6b 100644 --- a/src/runtime/string.scm +++ b/src/runtime/string.scm @@ -1643,25 +1643,6 @@ USA. (outer k (fix:+ q 1))))) pi)) -(define (xstring? object) - (or (string? object) - (wide-string? object))) - -(define (xstring-length string) - (cond ((string? string) (string-length string)) - ((wide-string? string) (wide-string-length string)) - (else (error:not-xstring string 'XSTRING-LENGTH)))) - -(define (xstring-ref string index) - (cond ((string? string) (string-ref string index)) - ((wide-string? string) (wide-string-ref string index)) - (else (error:not-xstring string 'XSTRING-REF)))) - -(define (xstring-set! string index char) - (cond ((string? string) (string-set! string index char)) - ((wide-string? string) (wide-string-set! string index char)) - (else (error:not-xstring string 'XSTRING-SET!)))) - (define (xstring-move! xstring1 xstring2 start2) (xsubstring-move! xstring1 0 (xstring-length xstring1) xstring2 start2)) @@ -1673,18 +1654,6 @@ USA. (substring-move-right! xstring1 start1 end1 xstring2 start2)))) -(define (xsubstring xstring start end) - (guarantee-xsubstring xstring start end 'XSUBSTRING) - (let ((string (make-string (- end start)))) - (xsubstring-move! xstring start end string 0) - string)) - -(define (xstring-fill! xstring char) - (cond ((string? xstring) - (string-fill! xstring char)) - (else - (error:not-xstring xstring 'XSTRING-FILL!)))) - (define (xsubstring-fill! xstring start end char) (cond ((string? xstring) (substring-fill! xstring start end char)) diff --git a/src/runtime/xstring.scm b/src/runtime/xstring.scm new file mode 100644 index 000000000..1c07c76d4 --- /dev/null +++ b/src/runtime/xstring.scm @@ -0,0 +1,353 @@ +#| -*-Scheme-*- + +Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, + 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, + 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, + 2017 Massachusetts Institute of Technology + +This file is part of MIT/GNU Scheme. + +MIT/GNU Scheme is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at +your option) any later version. + +MIT/GNU Scheme is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with MIT/GNU Scheme; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, +USA. + +|# + +;;;; Extended strings +;;; package: (runtime xstring) + +;;; This implementation supports all R7RS string operations in which all the +;;; names have "string" replaced by "xstring". This is a transitional +;;; implementation to convert MIT/GNU Scheme to full Unicode string support. +;;; +;;; At some point in the future we'll renaming everything back to "string". + +;; get-output-xstring +;; number->xstring +;; open-output-xstring +;; read-xstring +;; symbol->xstring +;; utf8->xstring +;; vector->xstring +;; write-xstring +;; xstring->number +;; xstring->symbol +;; xstring->utf8 + +(declare (usual-integrations)) + +;;;; U32 vector + +(define-integrable (u32->byte-index index) + (fix:* index 4)) + +(define-integrable (byte->u32-index index) + (fix:quotient index 4)) + +(define (make-u32-vector length) + (make-bytevector (u32->byte-index length))) + +(define (u32-vector-length bytes) + (byte->u32-index (bytevector-length bytes))) + +(define (u32-vector-ref bytes index) + (bytevector-u32be-ref bytes (u32->byte-index index))) + +(define (u32-vector-set! bytes index u32) + (bytevector-u32be-set! bytes (u32->byte-index index) u32)) + +(define (u32-vector-copy! to at from start end) + (bytevector-copy! to (u32->byte-index to) + from (u32->byte-index start) (u32->byte-index end))) + +(define (u32-vector-fill! bytes start end u32) + (do ((i start (fix:+ i 1))) + ((not (fix:< i end))) + (u32-vector-set! bytes i u32))) + +;;;; UTF-32 string + +(define (make-utf32-string k #!optional char) + (let ((v (make-u32-vector k))) + (if (not (default-object? char)) + (u32-vector-fill! v 0 k (char->integer char))) + (%make-utf32-string v))) + +(define (utf32-string . chars) + (list->utf32-string chars)) + +(define (list->utf32-string chars) + (let ((v (make-u32-vector (length chars)))) + (do ((chars chars (cdr chars)) + (i 0 (fix:+ i 1))) + ((not (pair? chars))) + (u32-vector-set! v i (char->integer (car chars)))) + (%make-utf32-string v))) + +(define-record-type + (%make-utf32-string vector) + utf32-string? + (vector utf32-string-vector)) + +(define (utf32-string-length string) + (u32-vector-length (utf32-string-vector string))) + +(define (utf32-string-ref string index) + (integer->char (u32-vector-ref (utf32-string-vector string) index))) + +(define (utf32-string-set! string index char) + (u32-vector-set! (utf32-string-vector string) + index + (char->integer char))) + +(define (utf32-string-copy string #!optional start end) + (let* ((end (get-end end (utf32-string-length string) 'utf32-string-copy)) + (start (get-start start end 'utf32-string-copy)) + (to (make-utf32-string (fix:- end start)))) + (%utf32-string-copy! to 0 string start end) + to)) + +(define (utf32-string-copy! to at from #!optional start end) + (let* ((end (get-end end (utf32-string-length from) 'utf32-string-copy!)) + (start (get-start start end 'utf32-string-copy!))) + (%utf32-string-copy! to at from start end))) + +(define-integrable (%utf32-string-copy! to at from start end) + (u32-vector-copy! (utf32-string-vector to) at + (utf32-string-vector from) start end)) + +(define (utf32-string-fill! string char #!optional start end) + (let* ((end (get-end end (utf32-string-length string) 'utf32-string-fill!)) + (start (get-start start end 'utf32-string-fill!))) + (u32-vector-fill! (utf32-string-vector string) start end + (char->integer char)))) + +(define (utf32-string->list string #!optional start end) + (let* ((end (get-end end (utf32-string-length string) 'utf32-string->list)) + (start (get-start start end 'utf32-string->list))) + (do ((i (fix:- end 1) (fix:- i 1)) + (chars '() (cons (utf32-string-ref string i) chars))) + ((not (fix:>= i start)) chars)))) + +(define (utf32-string->vector string #!optional start end) + (let* ((end (get-end end (utf32-string-length string) 'utf32-string->vector)) + (start (get-start start end 'utf32-string->vector)) + (v (make-vector (fix:- end start)))) + (do ((i start (fix:+ i 1))) + ((not (fix:< i end))) + (vector-set! v i (utf32-string-ref string i))) + v)) + +(define (utf32-string-map proc string . strings) + (if (null? strings) + (let* ((n (utf32-string-length string)) + (result (make-utf32-string n))) + (do ((i 0 (fix:+ i 1))) + ((not (fix:< i n))) + (utf32-string-set! result i (proc (utf32-string-ref string i)))) + result) + (let* ((n (min-length utf32-string-length string strings)) + (result (make-utf32-string n))) + (do ((i 0 (fix:+ i 1))) + ((not (fix:< i n))) + (utf32-string-set! result i + (apply proc + (utf32-string-ref string i) + (map (lambda (string) + (utf32-string-ref string i)) + strings)))) + result))) + +(define (utf32-string-for-each procedure string . strings) + (if (null? strings) + (let ((n (utf32-string-length string))) + (do ((i 0 (fix:+ i 1))) + ((not (fix:< i n))) + (procedure (utf32-string-ref string i)))) + (let ((n (min-length utf32-string-length string strings))) + (do ((i 0 (fix:+ i 1))) + ((not (fix:< i n))) + (apply procedure + (utf32-string-ref string i) + (map (lambda (string) + (utf32-string-ref string i)) + strings)))))) + +;;;; String + +(define (xstring? object) + (or (legacy-string? object) + (utf32-string? object))) + +(define (create-xstring-registrations!) + (register-predicate! legacy-string? 'legacy-string) + (register-predicate! utf32-string? 'utf32-string) + (register-predicate! xstring? 'xstring + '<= legacy-string? + '<= utf32-string?)) + +(define (xstring-append . strings) + (let ((string + (make-utf32-string + (do ((strings strings (cdr strings)) + (n 0 (fix:+ n (xstring-length (car strings))))) + ((not (pair? strings)) n))))) + (let loop ((strings strings) (i 0)) + (if (pair? strings) + (let ((n (xstring-length (car strings)))) + (xstring-copy! string i (car strings) 0 n) + (loop (cdr strings) (fix:+ i n))))) + string)) + +(define (xstring-length string) + (cond ((legacy-string? string) (legacy-string-length string)) + ((utf32-string? string) (utf32-string-length string)) + (else (error:not-a xstring? string 'xstring-length)))) + +(define (xstring-ref string index) + (cond ((legacy-string? string) (legacy-string-ref string index)) + ((utf32-string? string) (utf32-string-ref string index)) + (else (error:not-a xstring? string 'xstring-ref)))) + +(define (xstring-set! string index char) + (cond ((legacy-string? string) (legacy-string-set! string index char)) + ((utf32-string? string) (utf32-string-set! string index char)) + (else (error:not-a xstring? string 'xstring-set!)))) + +(define (xstring-copy string #!optional start end) + (cond ((legacy-string? string) (legacy-string-copy string start end)) + ((utf32-string? string) (utf32-string-copy string start end)) + (else (error:not-a xstring? string 'xstring-copy)))) + +(define (xstring-copy! to at from #!optional start end) + (cond ((legacy-string? to) + (cond ((legacy-string? from) + (legacy-string-copy! to at from start end)) + ((utf32-string? from) + (utf32->legacy-copy! to at from start end)) + (else + (error:not-a xstring? from 'xstring-copy!)))) + ((utf32-string? to) + (cond ((legacy-string? from) + (legacy->utf32-copy! to at from start end)) + ((utf32-string? from) + (utf32-string-copy! to at from start end)) + (else + (error:not-a xstring? from 'xstring-copy!)))) + (else + (error:not-a xstring? to 'xstring-copy!)))) + +(define (utf32->legacy-copy! to at from #!optional start end) + (let* ((end (get-end end (utf32-string-length from) 'xstring-copy!)) + (start (get-start start end 'xstring-copy!))) + (do ((i start (fix:+ i 1)) + (j at (fix:+ j 1))) + ((not (fix:< i end))) + (legacy-string-set! to j (utf32-string-ref from i))))) + +(define (legacy->utf32-copy! to at from #!optional start end) + (let* ((end (get-end end (legacy-string-length from) 'xstring-copy!)) + (start (get-start start end 'xstring-copy!))) + (do ((i start (fix:+ i 1)) + (j at (fix:+ j 1))) + ((not (fix:< i end))) + (utf32-string-set! to j (legacy-string-ref from i))))) + +(define (xstring-fill! string char #!optional start end) + (cond ((legacy-string? string) (legacy-string-fill! string char start end)) + ((utf32-string? string) (utf32-string-fill! string char start end)) + (else (error:not-a xstring? string 'xstring-fill!)))) + +(define (xstring->list string #!optional start end) + (cond ((legacy-string? string) (legacy-string->list string start end)) + ((utf32-string? string) (utf32-string->list string start end)) + (else (error:not-a xstring? string 'xstring->list)))) + +(define (xstring->vector string #!optional start end) + (cond ((legacy-string? string) (legacy-string->vector string start end)) + ((utf32-string? string) (utf32-string->vector string start end)) + (else (error:not-a xstring? string 'xstring->vector)))) + +(define (xstring-for-each procedure string . strings) + (if (null? strings) + (let ((n (xstring-length string))) + (do ((i 0 (fix:+ i 1))) + ((not (fix:< i n))) + (procedure (xstring-ref string i)))) + (let ((n (min-length xstring-length string strings))) + (do ((i 0 (fix:+ i 1))) + ((not (fix:< i n))) + (apply procedure + (xstring-ref string i) + (map (lambda (string) + (xstring-ref string i)) + strings)))))) + +(define (xstring-map proc string . strings) + (if (null? strings) + (let* ((n (xstring-length string)) + (result (make-utf32-string n))) + (do ((i 0 (fix:+ i 1))) + ((not (fix:< i n))) + (utf32-string-set! result i (proc (xstring-ref string i)))) + result) + (let* ((n (min-length xstring-length string strings)) + (result (make-utf32-string n))) + (do ((i 0 (fix:+ i 1))) + ((not (fix:< i n))) + (utf32-string-set! result i + (apply proc + (xstring-ref string i) + (map (lambda (string) + (xstring-ref string i)) + strings)))) + result))) + +;; (xstring-ci<=? string1 string2 . strings) +;; (xstring-ci=? string1 string2 . strings) +;; (xstring-ci>? string1 string2 . strings) +;; (xstring-downcase string) +;; (xstring-foldcase string) +;; (xstring-upcase string) +;; (xstring<=? string1 string2 . strings) +;; (xstring=? string1 string2 . strings) +;; (xstring>? string1 string2 . strings) + +(define (get-end end length caller) + (if (default-object? end) + length + (begin + (guarantee index-fixnum? end caller) + (if (not (fix:<= end length)) + (error:bad-range-argument end caller)) + end))) + +(define (get-start start end caller) + (if (default-object? start) + 0 + (begin + (guarantee index-fixnum? start caller) + (if (not (fix:<= start end)) + (error:bad-range-argument start caller)) + start))) + +(define (min-length string-length string strings) + (do ((strings strings (cdr strings)) + (n (string-length string) + (fix:min n (string-length (car strings))))) + ((null? strings) n))) \ No newline at end of file -- 2.25.1