From 486900d9769b0a13ef94bea37efbba50497affee Mon Sep 17 00:00:00 2001
From: Chris Hanson <org/chris-hanson/cph>
Date: Thu, 16 Feb 2017 22:27:03 -0800
Subject: [PATCH] Reorganize ustring around operations.

---
 src/runtime/ustring.scm | 446 ++++++++++++++++++++--------------------
 1 file changed, 221 insertions(+), 225 deletions(-)

diff --git a/src/runtime/ustring.scm b/src/runtime/ustring.scm
index 9000b7279..b5566390e 100644
--- a/src/runtime/ustring.scm
+++ b/src/runtime/ustring.scm
@@ -40,6 +40,13 @@ USA.
 ;;; everything to "string".
 
 (declare (usual-integrations))
+
+(define-primitives
+  (legacy-string-length string-length 1)
+  (legacy-string-ref string-ref 2)
+  (legacy-string-set! string-set! 3)
+  (legacy-string? string? 1)
+  (make-legacy-string string-allocate 1))
 
 ;;;; Utilities
 
@@ -108,13 +115,11 @@ USA.
       ((not (fix:< i end)))
     (u32-vector-set! bytes i u32)))
 
-;;;; UTF-32 strings
+;;;; String
 
-(define (make-utf32-string k #!optional char)
-  (let ((v (make-u32-vector k)))
-    (if (not (default-object? char))
-	(u32-vector-fill! v 0 k (char->integer char)))
-    (%record %utf32-string-tag v)))
+(define (ustring? object)
+  (or (legacy-string? object)
+      (utf32-string? object)))
 
 (define (utf32-string? object)
   (and (%record? object)
@@ -124,224 +129,6 @@ USA.
 (define %utf32-string-tag
   '|#[(runtime ustring)utf32-string]|)
 
-(define (utf32-string-vector string caller)
-  (guarantee utf32-string? string caller)
-  (%record-ref string 1))
-
-(define-integrable (utf32-end-index end string caller)
-  (fix:end-index end (utf32-string-length string) caller))
-
-(define (utf32-string-length string)
-  (u32-vector-length (utf32-string-vector string 'utf32-string-length)))
-
-(define (utf32-string-ref string index)
-  (integer->char
-   (u32-vector-ref (utf32-string-vector string 'utf32-string-ref) index)))
-
-(define (utf32-string-set! string index char)
-  (u32-vector-set! (utf32-string-vector string 'utf32-string-set!)
-		   index
-		   (char->integer char)))
-
-(define (utf32-string-copy string #!optional start end)
-  (let* ((end (utf32-end-index end string 'utf32-string-copy))
-	 (start (fix:start-index start end 'utf32-string-copy)))
-    (%utf32-string-copy string start end)))
-
-(define (%utf32-string-copy string start end)
-  (let ((to (make-utf32-string (fix:- end start))))
-    (%utf32-string-copy! to 0 string start end utf32-string-copy)
-    to))
-
-(define (utf32-string-copy! to at from #!optional start end)
-  (let* ((end (utf32-end-index end from 'utf32-string-copy!))
-	 (start (fix:start-index start end 'utf32-string-copy!)))
-    (%utf32-string-copy! to at from start end 'utf32-string-copy!)))
-
-(define-integrable (%utf32-string-copy! to at from start end caller)
-  (u32-vector-copy! (utf32-string-vector to caller) at
-		    (utf32-string-vector from caller) start end))
-
-(define (utf32-string-fill! string char #!optional start end)
-  (let* ((end (utf32-end-index end string 'utf32-string-fill!))
-	 (start (fix:start-index start end 'utf32-string-fill!)))
-    (u32-vector-fill! (utf32-string-vector string 'utf32-string-fill!)
-		      start
-		      end
-		      (char->integer char))))
-
-(define (utf32-string->list string #!optional start end)
-  (let* ((end (utf32-end-index end string 'utf32-string->list))
-	 (start (fix:start-index start end 'utf32-string->list)))
-    (do ((i (fix:- end 1) (fix:- i 1))
-	 (chars '() (cons (utf32-string-ref string i) chars)))
-	((not (fix:>= i start)) chars))))
-
-(define utf32-string->vector
-  (x-copy-maker utf32-string-length utf32-string-ref make-vector vector-set!
-		'utf32-string->vector))
-
-(define (utf32-string-find-first-index proc string #!optional start end)
-  (let* ((caller 'utf32-string-find-next-index)
-	 (end (utf32-end-index end string caller))
-	 (start (fix:start-index start end caller)))
-    (let loop ((i start))
-      (and (fix:< i end)
-	   (if (proc (utf32-string-ref string i))
-	       i
-	       (loop (fix:+ i 1)))))))
-
-(define (utf32-string-find-last-index proc string #!optional start end)
-  (let* ((caller 'utf32-string-find-last-index)
-	 (end (utf32-end-index end string caller))
-	 (start (fix:start-index start end caller)))
-    (let loop ((i (fix:- end 1)))
-      (and (fix:>= i start)
-	   (if (proc (utf32-string-ref string i))
-	       i
-	       (loop (fix:- i 1)))))))
-
-(define (utf32-string-map proc string . strings)
-  (if (null? strings)
-      (let* ((n (utf32-string-length string))
-	     (result (make-utf32-string n)))
-	(do ((i 0 (fix:+ i 1)))
-	    ((not (fix:< i n)))
-	  (utf32-string-set! result i (proc (utf32-string-ref string i))))
-	result)
-      (let* ((n (min-length utf32-string-length string strings))
-	     (result (make-utf32-string n)))
-	(do ((i 0 (fix:+ i 1)))
-	    ((not (fix:< i n)))
-	  (utf32-string-set! result i
-			     (apply proc
-				    (utf32-string-ref string i)
-				    (map (lambda (string)
-					   (utf32-string-ref string i))
-					 strings))))
-	result)))
-
-(define (utf32-string-for-each procedure string . strings)
-  (if (null? strings)
-      (let ((n (utf32-string-length string)))
-	(do ((i 0 (fix:+ i 1)))
-	    ((not (fix:< i n)))
-	  (procedure (utf32-string-ref string i))))
-      (let ((n (min-length utf32-string-length string strings)))
-	(do ((i 0 (fix:+ i 1)))
-	    ((not (fix:< i n)))
-	  (apply procedure
-		 (utf32-string-ref string i)
-		 (map (lambda (string)
-			(utf32-string-ref string i))
-		      strings))))))
-
-(define (utf32-string-downcase string)
-  (utf32-case-transform string char-downcase-full))
-
-(define (utf32-string-foldcase string)
-  (utf32-case-transform string char-foldcase-full))
-
-(define (utf32-string-upcase string)
-  (utf32-case-transform string char-upcase-full))
-
-(define (utf32-case-transform string transform)
-  (let ((chars
-	 (append-map transform
-		     (utf32-string->list string))))
-    (let ((n (length chars)))
-      (let ((result (make-utf32-string n)))
-	(do ((chars chars (cdr chars))
-	     (i 0 (fix:+ i 1)))
-	    ((not (pair? chars)))
-	  (utf32-string-set! result i (car chars)))
-	result))))
-
-;;;; Legacy strings
-
-(define-primitives
-  (legacy-string-length string-length 1)
-  (legacy-string-ref string-ref 2)
-  (legacy-string-set! string-set! 3)
-  (legacy-string? string? 1)
-  (make-legacy-string string-allocate 1))
-
-(define (legacy-string-fill! string char #!optional start end)
-  (let* ((end (fix:end-index end (legacy-string-length string) 'string-fill!))
-	 (start (fix:start-index start end 'string-fill!)))
-    (do ((index start (fix:+ index 1)))
-	((not (fix:< index end)) unspecific)
-      (legacy-string-set! string index char))))
-
-(define legacy-string-copy
-  (x-copy-maker legacy-string-length legacy-string-ref make-legacy-string
-		legacy-string-set! 'string-copy))
-
-(define legacy-string-copy!
-  (x-copy!-maker legacy-string-length legacy-string-ref legacy-string-set!
-		 'string-copy!))
-
-(define (legacy-string->list string #!optional start end)
-  (let* ((end (fix:end-index end (legacy-string-length string) 'string->list))
-	 (start (fix:start-index start end 'string->list)))
-    (let loop ((index (fix:- end 1)) (chars '()))
-      (if (fix:<= start index)
-	  (loop (fix:- index 1) (cons (legacy-string-ref string index) chars))
-	  chars))))
-
-(define legacy-string->vector
-  (x-copy-maker legacy-string-length legacy-string-ref make-vector vector-set!
-		'string->vector))
-
-(define (legacy-string-find-first-index proc string #!optional start end)
-  (let* ((caller 'legacy-string-find-next-index)
-	 (end (fix:end-index end (legacy-string-length string) caller))
-	 (start (fix:start-index start end caller)))
-    (let loop ((i start))
-      (and (fix:< i end)
-	   (if (proc (legacy-string-ref string i))
-	       i
-	       (loop (fix:+ i 1)))))))
-
-(define (legacy-string-find-last-index proc string #!optional start end)
-  (let* ((caller 'legacy-string-find-last-index)
-	 (end (fix:end-index end (legacy-string-length string) caller))
-	 (start (fix:start-index start end caller)))
-    (let loop ((i (fix:- end 1)))
-      (and (fix:>= i start)
-	   (if (proc (legacy-string-ref string i))
-	       i
-	       (loop (fix:- i 1)))))))
-
-(define (legacy-string-downcase string)
-  (let ((end (legacy-string-length string)))
-    (let ((string* (make-legacy-string end)))
-      (do ((i 0 (fix:+ i 1)))
-	  ((fix:= i end))
-	(legacy-string-set! string* i
-			    (char-downcase (legacy-string-ref string i))))
-      string*)))
-
-(define (legacy-string-upcase string)
-  (let ((end (legacy-string-length string)))
-    (let ((string* (make-legacy-string end)))
-      (do ((i 0 (fix:+ i 1)))
-	  ((fix:= i end))
-	(legacy-string-set! string* i
-			    (char-upcase (legacy-string-ref string i))))
-      string*)))
-
-(define (legacy-string-hash key #!optional modulus)
-  (if (default-object? modulus)
-      ((ucode-primitive string-hash) key)
-      ((ucode-primitive string-hash-mod) key modulus)))
-
-;;;; String
-
-(define (ustring? object)
-  (or (legacy-string? object)
-      (utf32-string? object)))
-
 (define (register-ustring-predicates!)
   (register-predicate! legacy-string? 'legacy-string)
   (register-predicate! utf32-string? 'utf32-string)
@@ -356,16 +143,38 @@ USA.
       (make-utf32-string k char)
       (make-legacy-string 0)))
 
+(define (make-utf32-string k #!optional char)
+  (let ((v (make-u32-vector k)))
+    (if (not (default-object? char))
+	(u32-vector-fill! v 0 k (char->integer char)))
+    (%record %utf32-string-tag v)))
+
+(define (utf32-string-vector string caller)
+  (guarantee utf32-string? string caller)
+  (%record-ref string 1))
+
 (define (ustring-length string)
   (cond ((legacy-string? string) (legacy-string-length string))
 	((utf32-string? string) (utf32-string-length string))
 	(else (error:not-a ustring? string 'ustring-length))))
 
+(define (utf32-string-length string)
+  (u32-vector-length (utf32-string-vector string 'utf32-string-length)))
+
 (define (ustring-ref string index)
   (cond ((legacy-string? string) (legacy-string-ref string index))
 	((utf32-string? string) (utf32-string-ref string index))
 	(else (error:not-a ustring? string 'ustring-ref))))
 
+(define (utf32-string-ref string index)
+  (integer->char
+   (u32-vector-ref (utf32-string-vector string 'utf32-string-ref) index)))
+
+(define (utf32-string-set! string index char)
+  (u32-vector-set! (utf32-string-vector string 'utf32-string-set!)
+		   index
+		   (char->integer char)))
+
 (define (ustring-set! string index char)
   (cond ((legacy-string? string) (legacy-string-set! string index char))
 	((utf32-string? string) (utf32-string-set! string index char))
@@ -443,6 +252,20 @@ USA.
 	  (else
 	   (error:not-a ustring? string 'ustring-copy)))))
 
+(define legacy-string-copy
+  (x-copy-maker legacy-string-length legacy-string-ref make-legacy-string
+		legacy-string-set! 'string-copy))
+
+(define (utf32-string-copy string #!optional start end)
+  (let* ((end (utf32-end-index end string 'utf32-string-copy))
+	 (start (fix:start-index start end 'utf32-string-copy)))
+    (%utf32-string-copy string start end)))
+
+(define (%utf32-string-copy string start end)
+  (let ((to (make-utf32-string (fix:- end start))))
+    (%utf32-string-copy! to 0 string start end utf32-string-copy)
+    to))
+
 (define (ustring-copy! to at from #!optional start end)
   (cond ((legacy-string? to)
 	 (cond ((legacy-string? from)
@@ -461,6 +284,10 @@ USA.
 	(else
 	 (error:not-a ustring? to 'ustring-copy!))))
 
+(define legacy-string-copy!
+  (x-copy!-maker legacy-string-length legacy-string-ref legacy-string-set!
+		 'string-copy!))
+
 (define utf32->legacy-copy!
   (x-copy!-maker utf32-string-length utf32-string-ref legacy-string-set!
 		 'ustring-copy!))
@@ -469,11 +296,35 @@ USA.
   (x-copy!-maker legacy-string-length legacy-string-ref utf32-string-set!
 		 'legacy->utf32-copy!))
 
+(define (utf32-string-copy! to at from #!optional start end)
+  (let* ((end (utf32-end-index end from 'utf32-string-copy!))
+	 (start (fix:start-index start end 'utf32-string-copy!)))
+    (%utf32-string-copy! to at from start end 'utf32-string-copy!)))
+
+(define-integrable (%utf32-string-copy! to at from start end caller)
+  (u32-vector-copy! (utf32-string-vector to caller) at
+		    (utf32-string-vector from caller) start end))
+
 (define (ustring-fill! string char #!optional start end)
   (cond ((legacy-string? string) (legacy-string-fill! string char start end))
 	((utf32-string? string) (utf32-string-fill! string char start end))
 	(else (error:not-a ustring? string 'ustring-fill!))))
-
+
+(define (legacy-string-fill! string char #!optional start end)
+  (let* ((end (fix:end-index end (legacy-string-length string) 'string-fill!))
+	 (start (fix:start-index start end 'string-fill!)))
+    (do ((index start (fix:+ index 1)))
+	((not (fix:< index end)) unspecific)
+      (legacy-string-set! string index char))))
+
+(define (utf32-string-fill! string char #!optional start end)
+  (let* ((end (utf32-end-index end string 'utf32-string-fill!))
+	 (start (fix:start-index start end 'utf32-string-fill!)))
+    (u32-vector-fill! (utf32-string-vector string 'utf32-string-fill!)
+		      start
+		      end
+		      (char->integer char))))
+
 (define (%ustring=? string1 string2)
   (and (fix:= (ustring-length string1) (ustring-length string2))
        (ustring-every char=? string1 string2)))
@@ -569,10 +420,33 @@ USA.
 	((utf32-string? string) (utf32-string->list string start end))
 	(else (error:not-a ustring? string 'ustring->list))))
 
+(define (utf32-string->list string #!optional start end)
+  (let* ((end (utf32-end-index end string 'utf32-string->list))
+	 (start (fix:start-index start end 'utf32-string->list)))
+    (do ((i (fix:- end 1) (fix:- i 1))
+	 (chars '() (cons (utf32-string-ref string i) chars)))
+	((not (fix:>= i start)) chars))))
+
+(define (legacy-string->list string #!optional start end)
+  (let* ((end (fix:end-index end (legacy-string-length string) 'string->list))
+	 (start (fix:start-index start end 'string->list)))
+    (let loop ((index (fix:- end 1)) (chars '()))
+      (if (fix:<= start index)
+	  (loop (fix:- index 1) (cons (legacy-string-ref string index) chars))
+	  chars))))
+
 (define (ustring->vector string #!optional start end)
   (cond ((legacy-string? string) (legacy-string->vector string start end))
 	((utf32-string? string) (utf32-string->vector string start end))
 	(else (error:not-a ustring? string 'ustring->vector))))
+
+(define legacy-string->vector
+  (x-copy-maker legacy-string-length legacy-string-ref make-vector vector-set!
+		'string->vector))
+
+(define utf32-string->vector
+  (x-copy-maker utf32-string-length utf32-string-ref make-vector vector-set!
+		'utf32-string->vector))
 
 (define (ustring-for-each proc string . strings)
   (if (null? strings)
@@ -589,6 +463,21 @@ USA.
 			(ustring-ref string i))
 		      strings))))))
 
+(define (utf32-string-for-each procedure string . strings)
+  (if (null? strings)
+      (let ((n (utf32-string-length string)))
+	(do ((i 0 (fix:+ i 1)))
+	    ((not (fix:< i n)))
+	  (procedure (utf32-string-ref string i))))
+      (let ((n (min-length utf32-string-length string strings)))
+	(do ((i 0 (fix:+ i 1)))
+	    ((not (fix:< i n)))
+	  (apply procedure
+		 (utf32-string-ref string i)
+		 (map (lambda (string)
+			(utf32-string-ref string i))
+		      strings))))))
+
 (define (ustring-map proc string . strings)
   (if (null? strings)
       (let* ((n (ustring-length string))
@@ -608,6 +497,26 @@ USA.
 					   (ustring-ref string i))
 					 strings))))
 	result)))
+
+(define (utf32-string-map proc string . strings)
+  (if (null? strings)
+      (let* ((n (utf32-string-length string))
+	     (result (make-utf32-string n)))
+	(do ((i 0 (fix:+ i 1)))
+	    ((not (fix:< i n)))
+	  (utf32-string-set! result i (proc (utf32-string-ref string i))))
+	result)
+      (let* ((n (min-length utf32-string-length string strings))
+	     (result (make-utf32-string n)))
+	(do ((i 0 (fix:+ i 1)))
+	    ((not (fix:< i n)))
+	  (utf32-string-set! result i
+			     (apply proc
+				    (utf32-string-ref string i)
+				    (map (lambda (string)
+					   (utf32-string-ref string i))
+					 strings))))
+	result)))
 
 (define (ustring-any proc string . strings)
   (cond ((null? strings)
@@ -677,6 +586,26 @@ USA.
 	(else
 	 (error:not-a ustring? string 'ustring-find-first-index))))
 
+(define (legacy-string-find-first-index proc string #!optional start end)
+  (let* ((caller 'legacy-string-find-next-index)
+	 (end (fix:end-index end (legacy-string-length string) caller))
+	 (start (fix:start-index start end caller)))
+    (let loop ((i start))
+      (and (fix:< i end)
+	   (if (proc (legacy-string-ref string i))
+	       i
+	       (loop (fix:+ i 1)))))))
+
+(define (utf32-string-find-first-index proc string #!optional start end)
+  (let* ((caller 'utf32-string-find-next-index)
+	 (end (utf32-end-index end string caller))
+	 (start (fix:start-index start end caller)))
+    (let loop ((i start))
+      (and (fix:< i end)
+	   (if (proc (utf32-string-ref string i))
+	       i
+	       (loop (fix:+ i 1)))))))
+
 (define (ustring-find-last-index proc string #!optional start end)
   (cond ((legacy-string? string)
 	 (legacy-string-find-last-index proc string start end))
@@ -685,6 +614,26 @@ USA.
 	(else
 	 (error:not-a ustring? string 'ustring-find-last-index))))
 
+(define (legacy-string-find-last-index proc string #!optional start end)
+  (let* ((caller 'legacy-string-find-last-index)
+	 (end (fix:end-index end (legacy-string-length string) caller))
+	 (start (fix:start-index start end caller)))
+    (let loop ((i (fix:- end 1)))
+      (and (fix:>= i start)
+	   (if (proc (legacy-string-ref string i))
+	       i
+	       (loop (fix:- i 1)))))))
+
+(define (utf32-string-find-last-index proc string #!optional start end)
+  (let* ((caller 'utf32-string-find-last-index)
+	 (end (utf32-end-index end string caller))
+	 (start (fix:start-index start end caller)))
+    (let loop ((i (fix:- end 1)))
+      (and (fix:>= i start)
+	   (if (proc (utf32-string-ref string i))
+	       i
+	       (loop (fix:- i 1)))))))
+
 (define (ustring-find-first-char string char #!optional start end)
   (ustring-find-first-index (char=-predicate char) string start end))
 
@@ -702,19 +651,63 @@ USA.
 	((utf32-string? string) (utf32-string-downcase string))
 	(else (error:not-a ustring? string 'ustring-downcase))))
 
+(define (legacy-string-downcase string)
+  (let ((end (legacy-string-length string)))
+    (let ((string* (make-legacy-string end)))
+      (do ((i 0 (fix:+ i 1)))
+	  ((fix:= i end))
+	(legacy-string-set! string* i
+			    (char-downcase (legacy-string-ref string i))))
+      string*)))
+
+(define (utf32-string-downcase string)
+  (utf32-case-transform string char-downcase-full))
+
 (define (ustring-foldcase string)
   (cond ((legacy-string? string) (legacy-string-downcase string))
 	((utf32-string? string) (utf32-string-foldcase string))
 	(else (error:not-a ustring? string 'ustring-foldcase))))
 
+(define (utf32-string-foldcase string)
+  (utf32-case-transform string char-foldcase-full))
+
 (define (ustring-upcase string)
   (cond ((legacy-string? string) (legacy-string-upcase string))
 	((utf32-string? string) (utf32-string-upcase string))
 	(else (error:not-a ustring? string 'ustring-upcase))))
 
+(define (utf32-string-upcase string)
+  (utf32-case-transform string char-upcase-full))
+
+(define (legacy-string-upcase string)
+  (let ((end (legacy-string-length string)))
+    (let ((string* (make-legacy-string end)))
+      (do ((i 0 (fix:+ i 1)))
+	  ((fix:= i end))
+	(legacy-string-set! string* i
+			    (char-upcase (legacy-string-ref string i))))
+      string*)))
+
+(define (utf32-case-transform string transform)
+  (let ((chars
+	 (append-map transform
+		     (utf32-string->list string))))
+    (let ((n (length chars)))
+      (let ((result (make-utf32-string n)))
+	(do ((chars chars (cdr chars))
+	     (i 0 (fix:+ i 1)))
+	    ((not (pair? chars)))
+	  (utf32-string-set! result i (car chars)))
+	result))))
+
 (define (ustring-hash string #!optional modulus)
   (legacy-string-hash (string-for-primitive string) modulus))
 
+(define (legacy-string-hash key #!optional modulus)
+  (if (default-object? modulus)
+      ((ucode-primitive string-hash) key)
+      ((ucode-primitive string-hash-mod) key modulus)))
+
 (define (ustring . objects)
   (%ustring* objects 'ustring))
 
@@ -747,6 +740,9 @@ USA.
 	(number? object)
 	(uri? object)))
 
+(define-integrable (utf32-end-index end string caller)
+  (fix:end-index end (utf32-string-length string) caller))
+
 (define (string-for-primitive string)
   (cond ((legacy-string? string)
 	 (let ((end (legacy-string-length string)))
-- 
2.25.1