From aafeee81eea3921e043d0332314eb4e44da176fa Mon Sep 17 00:00:00 2001
From: Chris Hanson <org/chris-hanson/cph>
Date: Wed, 25 Jan 2017 00:40:54 -0800
Subject: [PATCH] Initial draft of new string implementation.

---
 src/runtime/runtime.pkg | 105 ++++++++----
 src/runtime/string.scm  |  31 ----
 src/runtime/xstring.scm | 353 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 430 insertions(+), 59 deletions(-)
 create mode 100644 src/runtime/xstring.scm

diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg
index 6d4dd39bc..9bfea3364 100644
--- a/src/runtime/runtime.pkg
+++ b/src/runtime/runtime.pkg
@@ -973,17 +973,54 @@ USA.
   (export ()
 	  ;; BEGIN deprecated bindings
 	  (guarantee-vector-8b guarantee-string)
+	  (set-vector-8b-length! set-string-length!)
+	  (vector-8b-length string-length)
+	  (vector-8b-maximum-length string-maximum-length)
+	  (vector-8b? string?)
 	  error:not-string
 	  error:not-xstring
 	  guarantee-string
 	  guarantee-string-index
 	  guarantee-xstring
-	  legacy-string?
+	  hexadecimal->vector-8b
+	  make-vector-8b
+	  vector-8b->hexadecimal
+	  vector-8b-fill!
+	  vector-8b-find-next-char
+	  vector-8b-find-next-char-ci
+	  vector-8b-find-previous-char
+	  vector-8b-find-previous-char-ci
+	  vector-8b-ref
+	  vector-8b-set!
 	  ;; END deprecated bindings
-	  (set-vector-8b-length! set-string-length!)
-	  (vector-8b-length string-length)
-	  (vector-8b-maximum-length string-maximum-length)
-	  (vector-8b? string?)
+	  (legacy-string string)
+	  (legacy-string->list string->list)
+	  (legacy-string->vector string->vector)
+	  (legacy-string-append string-append)
+	  (legacy-string-ci<=? string-ci<=?)
+	  (legacy-string-ci<? string-ci<?)
+	  (legacy-string-ci=? string-ci=?)
+	  (legacy-string-ci>=? string-ci>=?)
+	  (legacy-string-ci>? string-ci>?)
+	  (legacy-string-copy string-copy)
+	  (legacy-string-copy! string-copy!)
+	  (legacy-string-downcase string-downcase)
+	  (legacy-string-fill! string-fill!)
+	  (legacy-string-for-each string-for-each)
+	  (legacy-string-length string-length)
+	  (legacy-string-map string-map)
+	  (legacy-string-ref string-ref)
+	  (legacy-string-set! string-set!)
+	  (legacy-string-upcase string-upcase)
+	  (legacy-string<=? string<=?)
+	  (legacy-string<? string<?)
+	  (legacy-string=? string=?)
+	  (legacy-string>=? string>=?)
+	  (legacy-string>? string>?)
+	  (legacy-string? string?)
+	  (legacy-substring substring)
+	  (list->legacy-string list->string)
+	  (make-legacy-string make-string)
 	  ascii-string-copy
 	  burst-string
 	  camel-case-string->lisp
@@ -992,11 +1029,9 @@ USA.
 	  guarantee-substring
 	  guarantee-substring-end-index
 	  guarantee-substring-start-index
-	  hexadecimal->vector-8b
 	  lisp-string->camel-case
 	  list->string
 	  make-string
-	  make-vector-8b
 	  reverse-string
 	  reverse-string!
 	  reverse-substring
@@ -1107,21 +1142,7 @@ USA.
 	  substring=?
 	  substring?
 	  utf8-string
-	  vector-8b->hexadecimal
-	  vector-8b-fill!
-	  vector-8b-find-next-char
-	  vector-8b-find-next-char-ci
-	  vector-8b-find-previous-char
-	  vector-8b-find-previous-char-ci
-	  vector-8b-ref
-	  vector-8b-set!
-	  xstring-fill!
-	  xstring-length
 	  xstring-move!
-	  xstring-ref
-	  xstring-set!
-	  xstring?
-	  xsubstring
 	  xsubstring-fill!
 	  xsubstring-find-next-char
 	  xsubstring-find-next-char-ci
@@ -1130,10 +1151,42 @@ USA.
 	  xsubstring-find-previous-char-ci
 	  xsubstring-find-previous-char-in-set
 	  xsubstring-move!)
-  (export (runtime generic-i/o-port)
-	  %substring-move!)
   (initialization (initialize-package!)))
 
+(define-package (runtime xstring)
+  (files "xstring")
+  (parent (runtime))
+  (export ()
+	  (list->xstring list->utf32-string)
+	  (make-xstring make-utf32-string)
+	  (xstring utf32-string)
+	  (xsubstring xstring-copy)
+	  xstring->list
+	  xstring->vector
+	  xstring-append
+	  ;; xstring-ci<=?
+	  ;; xstring-ci<?
+	  ;; xstring-ci=?
+	  ;; xstring-ci>=?
+	  ;; xstring-ci>?
+	  xstring-copy
+	  xstring-copy!
+	  ;; xstring-downcase
+	  xstring-fill!
+	  ;; xstring-foldcase
+	  xstring-for-each
+	  xstring-length
+	  xstring-map
+	  xstring-ref
+	  xstring-set!
+	  ;; xstring-upcase
+	  ;; xstring<=?
+	  ;; xstring<?
+	  ;; xstring=?
+	  ;; xstring>=?
+	  ;; xstring>?
+	  xstring?))
+
 (define-package (runtime bytevector)
   (files "bytevector")
   (parent (runtime))
@@ -5747,11 +5800,7 @@ USA.
 	  wide-string-ref
 	  wide-string-set!
 	  wide-string?
-	  wide-substring)
-  (export (runtime generic-i/o-port)
-	  wide-string-contents)
-  (export (runtime input-port)
-	  wide-string-contents))
+	  wide-substring))
 
 (define-package (runtime uri)
   (files "url")
diff --git a/src/runtime/string.scm b/src/runtime/string.scm
index 30c3a4121..7bc8efd6b 100644
--- a/src/runtime/string.scm
+++ b/src/runtime/string.scm
@@ -1643,25 +1643,6 @@ USA.
 	    (outer k (fix:+ q 1)))))
     pi))
 
-(define (xstring? object)
-  (or (string? object)
-      (wide-string? object)))
-
-(define (xstring-length string)
-  (cond ((string? string) (string-length string))
-	((wide-string? string) (wide-string-length string))
-	(else (error:not-xstring string 'XSTRING-LENGTH))))
-
-(define (xstring-ref string index)
-  (cond ((string? string) (string-ref string index))
-	((wide-string? string) (wide-string-ref string index))
-	(else (error:not-xstring string 'XSTRING-REF))))
-
-(define (xstring-set! string index char)
-  (cond ((string? string) (string-set! string index char))
-	((wide-string? string) (wide-string-set! string index char))
-	(else (error:not-xstring string 'XSTRING-SET!))))
-
 (define (xstring-move! xstring1 xstring2 start2)
   (xsubstring-move! xstring1 0 (xstring-length xstring1) xstring2 start2))
 
@@ -1673,18 +1654,6 @@ USA.
 	 (substring-move-right! xstring1 start1 end1
 				xstring2 start2))))
 
-(define (xsubstring xstring start end)
-  (guarantee-xsubstring xstring start end 'XSUBSTRING)
-  (let ((string (make-string (- end start))))
-    (xsubstring-move! xstring start end string 0)
-    string))
-
-(define (xstring-fill! xstring char)
-  (cond ((string? xstring)
-	 (string-fill! xstring char))
-	(else
-	 (error:not-xstring xstring 'XSTRING-FILL!))))
-
 (define (xsubstring-fill! xstring start end char)
   (cond ((string? xstring)
 	 (substring-fill! xstring start end char))
diff --git a/src/runtime/xstring.scm b/src/runtime/xstring.scm
new file mode 100644
index 000000000..1c07c76d4
--- /dev/null
+++ b/src/runtime/xstring.scm
@@ -0,0 +1,353 @@
+#| -*-Scheme-*-
+
+Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994,
+    1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
+    2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016,
+    2017 Massachusetts Institute of Technology
+
+This file is part of MIT/GNU Scheme.
+
+MIT/GNU Scheme is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at
+your option) any later version.
+
+MIT/GNU Scheme is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with MIT/GNU Scheme; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
+USA.
+
+|#
+
+;;;; Extended strings
+;;; package: (runtime xstring)
+
+;;; This implementation supports all R7RS string operations in which all the
+;;; names have "string" replaced by "xstring".  This is a transitional
+;;; implementation to convert MIT/GNU Scheme to full Unicode string support.
+;;;
+;;; At some point in the future we'll renaming everything back to "string".
+
+;; get-output-xstring
+;; number->xstring
+;; open-output-xstring
+;; read-xstring
+;; symbol->xstring
+;; utf8->xstring
+;; vector->xstring
+;; write-xstring
+;; xstring->number
+;; xstring->symbol
+;; xstring->utf8
+
+(declare (usual-integrations))
+
+;;;; U32 vector
+
+(define-integrable (u32->byte-index index)
+  (fix:* index 4))
+
+(define-integrable (byte->u32-index index)
+  (fix:quotient index 4))
+
+(define (make-u32-vector length)
+  (make-bytevector (u32->byte-index length)))
+
+(define (u32-vector-length bytes)
+  (byte->u32-index (bytevector-length bytes)))
+
+(define (u32-vector-ref bytes index)
+  (bytevector-u32be-ref bytes (u32->byte-index index)))
+
+(define (u32-vector-set! bytes index u32)
+  (bytevector-u32be-set! bytes (u32->byte-index index) u32))
+
+(define (u32-vector-copy! to at from start end)
+  (bytevector-copy! to (u32->byte-index to)
+		    from (u32->byte-index start) (u32->byte-index end)))
+
+(define (u32-vector-fill! bytes start end u32)
+  (do ((i start (fix:+ i 1)))
+      ((not (fix:< i end)))
+    (u32-vector-set! bytes i u32)))
+
+;;;; UTF-32 string
+
+(define (make-utf32-string k #!optional char)
+  (let ((v (make-u32-vector k)))
+    (if (not (default-object? char))
+	(u32-vector-fill! v 0 k (char->integer char)))
+    (%make-utf32-string v)))
+
+(define (utf32-string . chars)
+  (list->utf32-string chars))
+
+(define (list->utf32-string chars)
+  (let ((v (make-u32-vector (length chars))))
+    (do ((chars chars (cdr chars))
+	 (i 0 (fix:+ i 1)))
+	((not (pair? chars)))
+      (u32-vector-set! v i (char->integer (car chars))))
+    (%make-utf32-string v)))
+
+(define-record-type <utf32-string>
+    (%make-utf32-string vector)
+    utf32-string?
+  (vector utf32-string-vector))
+
+(define (utf32-string-length string)
+  (u32-vector-length (utf32-string-vector string)))
+
+(define (utf32-string-ref string index)
+  (integer->char (u32-vector-ref (utf32-string-vector string) index)))
+
+(define (utf32-string-set! string index char)
+  (u32-vector-set! (utf32-string-vector string)
+		   index
+		   (char->integer char)))
+
+(define (utf32-string-copy string #!optional start end)
+  (let* ((end (get-end end (utf32-string-length string) 'utf32-string-copy))
+	 (start (get-start start end 'utf32-string-copy))
+	 (to (make-utf32-string (fix:- end start))))
+    (%utf32-string-copy! to 0 string start end)
+    to))
+
+(define (utf32-string-copy! to at from #!optional start end)
+  (let* ((end (get-end end (utf32-string-length from) 'utf32-string-copy!))
+	 (start (get-start start end 'utf32-string-copy!)))
+    (%utf32-string-copy! to at from start end)))
+
+(define-integrable (%utf32-string-copy! to at from start end)
+  (u32-vector-copy! (utf32-string-vector to) at
+		    (utf32-string-vector from) start end))
+
+(define (utf32-string-fill! string char #!optional start end)
+  (let* ((end (get-end end (utf32-string-length string) 'utf32-string-fill!))
+	 (start (get-start start end 'utf32-string-fill!)))
+    (u32-vector-fill! (utf32-string-vector string) start end
+		      (char->integer char))))
+
+(define (utf32-string->list string #!optional start end)
+  (let* ((end (get-end end (utf32-string-length string) 'utf32-string->list))
+	 (start (get-start start end 'utf32-string->list)))
+    (do ((i (fix:- end 1) (fix:- i 1))
+	 (chars '() (cons (utf32-string-ref string i) chars)))
+	((not (fix:>= i start)) chars))))
+
+(define (utf32-string->vector string #!optional start end)
+  (let* ((end (get-end end (utf32-string-length string) 'utf32-string->vector))
+	 (start (get-start start end 'utf32-string->vector))
+	 (v (make-vector (fix:- end start))))
+    (do ((i start (fix:+ i 1)))
+	((not (fix:< i end)))
+      (vector-set! v i (utf32-string-ref string i)))
+    v))
+
+(define (utf32-string-map proc string . strings)
+  (if (null? strings)
+      (let* ((n (utf32-string-length string))
+	     (result (make-utf32-string n)))
+	(do ((i 0 (fix:+ i 1)))
+	    ((not (fix:< i n)))
+	  (utf32-string-set! result i (proc (utf32-string-ref string i))))
+	result)
+      (let* ((n (min-length utf32-string-length string strings))
+	     (result (make-utf32-string n)))
+	(do ((i 0 (fix:+ i 1)))
+	    ((not (fix:< i n)))
+	  (utf32-string-set! result i
+			     (apply proc
+				    (utf32-string-ref string i)
+				    (map (lambda (string)
+					   (utf32-string-ref string i))
+					 strings))))
+	result)))
+
+(define (utf32-string-for-each procedure string . strings)
+  (if (null? strings)
+      (let ((n (utf32-string-length string)))
+	(do ((i 0 (fix:+ i 1)))
+	    ((not (fix:< i n)))
+	  (procedure (utf32-string-ref string i))))
+      (let ((n (min-length utf32-string-length string strings)))
+	(do ((i 0 (fix:+ i 1)))
+	    ((not (fix:< i n)))
+	  (apply procedure
+		 (utf32-string-ref string i)
+		 (map (lambda (string)
+			(utf32-string-ref string i))
+		      strings))))))
+
+;;;; String
+
+(define (xstring? object)
+  (or (legacy-string? object)
+      (utf32-string? object)))
+
+(define (create-xstring-registrations!)
+  (register-predicate! legacy-string? 'legacy-string)
+  (register-predicate! utf32-string? 'utf32-string)
+  (register-predicate! xstring? 'xstring
+		       '<= legacy-string?
+		       '<= utf32-string?))
+
+(define (xstring-append . strings)
+  (let ((string
+	 (make-utf32-string
+	  (do ((strings strings (cdr strings))
+	       (n 0 (fix:+ n (xstring-length (car strings)))))
+	      ((not (pair? strings)) n)))))
+    (let loop ((strings strings) (i 0))
+      (if (pair? strings)
+	  (let ((n (xstring-length (car strings))))
+	    (xstring-copy! string i (car strings) 0 n)
+	    (loop (cdr strings) (fix:+ i n)))))
+    string))
+
+(define (xstring-length string)
+  (cond ((legacy-string? string) (legacy-string-length string))
+	((utf32-string? string) (utf32-string-length string))
+	(else (error:not-a xstring? string 'xstring-length))))
+
+(define (xstring-ref string index)
+  (cond ((legacy-string? string) (legacy-string-ref string index))
+	((utf32-string? string) (utf32-string-ref string index))
+	(else (error:not-a xstring? string 'xstring-ref))))
+
+(define (xstring-set! string index char)
+  (cond ((legacy-string? string) (legacy-string-set! string index char))
+	((utf32-string? string) (utf32-string-set! string index char))
+	(else (error:not-a xstring? string 'xstring-set!))))
+
+(define (xstring-copy string #!optional start end)
+  (cond ((legacy-string? string) (legacy-string-copy string start end))
+	((utf32-string? string) (utf32-string-copy string start end))
+	(else (error:not-a xstring? string 'xstring-copy))))
+
+(define (xstring-copy! to at from #!optional start end)
+  (cond ((legacy-string? to)
+	 (cond ((legacy-string? from)
+		(legacy-string-copy! to at from start end))
+	       ((utf32-string? from)
+		(utf32->legacy-copy! to at from start end))
+	       (else
+		(error:not-a xstring? from 'xstring-copy!))))
+	((utf32-string? to)
+	 (cond ((legacy-string? from)
+		(legacy->utf32-copy! to at from start end))
+	       ((utf32-string? from)
+		(utf32-string-copy! to at from start end))
+	       (else
+		(error:not-a xstring? from 'xstring-copy!))))
+	(else
+	 (error:not-a xstring? to 'xstring-copy!))))
+
+(define (utf32->legacy-copy! to at from #!optional start end)
+  (let* ((end (get-end end (utf32-string-length from) 'xstring-copy!))
+	 (start (get-start start end 'xstring-copy!)))
+    (do ((i start (fix:+ i 1))
+	 (j at (fix:+ j 1)))
+	((not (fix:< i end)))
+      (legacy-string-set! to j (utf32-string-ref from i)))))
+
+(define (legacy->utf32-copy! to at from #!optional start end)
+  (let* ((end (get-end end (legacy-string-length from) 'xstring-copy!))
+	 (start (get-start start end 'xstring-copy!)))
+    (do ((i start (fix:+ i 1))
+	 (j at (fix:+ j 1)))
+	((not (fix:< i end)))
+      (utf32-string-set! to j (legacy-string-ref from i)))))
+
+(define (xstring-fill! string char #!optional start end)
+  (cond ((legacy-string? string) (legacy-string-fill! string char start end))
+	((utf32-string? string) (utf32-string-fill! string char start end))
+	(else (error:not-a xstring? string 'xstring-fill!))))
+
+(define (xstring->list string #!optional start end)
+  (cond ((legacy-string? string) (legacy-string->list string start end))
+	((utf32-string? string) (utf32-string->list string start end))
+	(else (error:not-a xstring? string 'xstring->list))))
+
+(define (xstring->vector string #!optional start end)
+  (cond ((legacy-string? string) (legacy-string->vector string start end))
+	((utf32-string? string) (utf32-string->vector string start end))
+	(else (error:not-a xstring? string 'xstring->vector))))
+
+(define (xstring-for-each procedure string . strings)
+  (if (null? strings)
+      (let ((n (xstring-length string)))
+	(do ((i 0 (fix:+ i 1)))
+	    ((not (fix:< i n)))
+	  (procedure (xstring-ref string i))))
+      (let ((n (min-length xstring-length string strings)))
+	(do ((i 0 (fix:+ i 1)))
+	    ((not (fix:< i n)))
+	  (apply procedure
+		 (xstring-ref string i)
+		 (map (lambda (string)
+			(xstring-ref string i))
+		      strings))))))
+
+(define (xstring-map proc string . strings)
+  (if (null? strings)
+      (let* ((n (xstring-length string))
+	     (result (make-utf32-string n)))
+	(do ((i 0 (fix:+ i 1)))
+	    ((not (fix:< i n)))
+	  (utf32-string-set! result i (proc (xstring-ref string i))))
+	result)
+      (let* ((n (min-length xstring-length string strings))
+	     (result (make-utf32-string n)))
+	(do ((i 0 (fix:+ i 1)))
+	    ((not (fix:< i n)))
+	  (utf32-string-set! result i
+			     (apply proc
+				    (xstring-ref string i)
+				    (map (lambda (string)
+					   (xstring-ref string i))
+					 strings))))
+	result)))
+
+;; (xstring-ci<=? string1 string2 . strings)
+;; (xstring-ci<? string1 string2 . strings)
+;; (xstring-ci=? string1 string2 . strings)
+;; (xstring-ci>=? string1 string2 . strings)
+;; (xstring-ci>? string1 string2 . strings)
+;; (xstring-downcase string)
+;; (xstring-foldcase string)
+;; (xstring-upcase string)
+;; (xstring<=? string1 string2 . strings)
+;; (xstring<? string1 string2 . strings)
+;; (xstring=? string1 string2 . strings)
+;; (xstring>=? string1 string2 . strings)
+;; (xstring>? string1 string2 . strings)
+
+(define (get-end end length caller)
+  (if (default-object? end)
+      length
+      (begin
+	(guarantee index-fixnum? end caller)
+	(if (not (fix:<= end length))
+	    (error:bad-range-argument end caller))
+	end)))
+
+(define (get-start start end caller)
+  (if (default-object? start)
+      0
+      (begin
+	(guarantee index-fixnum? start caller)
+	(if (not (fix:<= start end))
+	    (error:bad-range-argument start caller))
+	start)))
+
+(define (min-length string-length string strings)
+  (do ((strings strings (cdr strings))
+       (n (string-length string)
+	  (fix:min n (string-length (car strings)))))
+      ((null? strings) n)))
\ No newline at end of file
-- 
2.25.1