From c338d97cf8035172d1e76b441a3cae54d56bc069 Mon Sep 17 00:00:00 2001
From: Chris Hanson <org/chris-hanson/cph>
Date: Mon, 2 Dec 2019 16:05:48 -0800
Subject: [PATCH] Implement regexp replacement.

---
 src/runtime/runtime.pkg  |  12 ++--
 src/runtime/srfi-115.scm | 134 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 127 insertions(+), 19 deletions(-)

diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg
index c7bb35510..dcaa14929 100644
--- a/src/runtime/runtime.pkg
+++ b/src/runtime/runtime.pkg
@@ -5578,22 +5578,26 @@ USA.
   (files "srfi-115")
   (parent (runtime regexp))
   (export ()
-	  condition-type:compile-regexp
-	  print-regexp
+	  condition-type:compile-regexp	;extension
+	  print-regexp			;extension
 	  regexp
-	  regexp->nfa
+	  regexp->nfa			;extension
 	  regexp-match->list
 	  regexp-match-count
 	  regexp-match-keys
+	  regexp-match-replace		;extension
+	  regexp-match-replace-template? ;extension
 	  regexp-match-submatch
 	  regexp-match-submatch-end
 	  regexp-match-submatch-start
 	  regexp-match?
 	  regexp-matches
 	  regexp-matches?
+	  regexp-replace
+	  regexp-replace-all
 	  regexp-search
 	  regexp?
-	  valid-cset-sre?
+	  valid-cset-sre?		;extension
 	  valid-sre?))
 
 (define-package (runtime regexp rules)
diff --git a/src/runtime/srfi-115.scm b/src/runtime/srfi-115.scm
index 719570485..1a2fbe614 100644
--- a/src/runtime/srfi-115.scm
+++ b/src/runtime/srfi-115.scm
@@ -97,7 +97,12 @@ USA.
   (guarantee nfc-string? string 'regexp-matches?)
   (let* ((end (fix:end-index end (string-length string) 'regexp-matches?))
 	 (start (fix:start-index start end 'regexp-matches?)))
-    (and (run-matcher (regexp-impl (regexp re)) string start end)
+    (and (run-matcher (regexp-impl
+		       (if (regexp? re)
+			   re
+			   ;; Disable captures to speed up match.
+			   (compile-sre-top-level `(w/nocapture ,re))))
+		      string start end)
 	 #t)))
 
 (define (regexp-matches re string #!optional start end)
@@ -106,21 +111,23 @@ USA.
 	 (start (fix:start-index start end 'regexp-matches)))
     (%regexp-match (regexp re) string start end)))
 
+(define (%regexp-match regexp string start end)
+  (let ((groups (run-matcher (regexp-impl regexp) string start end)))
+    (and groups
+	 (make-regexp-match (car groups) (cdr groups)))))
+
 (define (regexp-search re string #!optional start end)
   (guarantee nfc-string? string 'regexp-search)
   (let* ((end (fix:end-index end (string-length string) 'regexp-search))
 	 (start (fix:start-index start end 'regexp-search)))
-    (let ((regexp (regexp re)))
-      (let loop ((index start))
-	(if (fix:< index end)
-	    (or (%regexp-match regexp string index end)
-		(loop (fix:+ index 1)))
-	    (%regexp-match regexp string index end))))))
+    (%regexp-search (regexp re) string start end)))
 
-(define (%regexp-match regexp string start end)
-  (let ((groups (run-matcher (regexp-impl regexp) string start end)))
-    (and groups
-	 (make-regexp-match (car groups) (cdr groups)))))
+(define (%regexp-search regexp string start end)
+  (let loop ((index start))
+    (if (fix:< index end)
+	(or (%regexp-match regexp string index end)
+	    (loop (fix:+ index 1)))
+	(%regexp-match regexp string index end))))
 
 (define-record-type <regexp-match>
     (make-regexp-match group0 groups)
@@ -164,6 +171,107 @@ USA.
   (cons (group-key (%regexp-match-group0 match))
 	(map group-key (%regexp-match-groups match))))
 
+(define (regexp-replace re string subst #!optional start end count)
+  (guarantee regexp-replace-subst? subst 'regexp-replace)
+  (let* ((len (string-length string))
+	 (end (if end (fix:end-index end len 'regexp-replace) len))
+	 (start (fix:start-index start end 'regexp-replace))
+	 (regexp (regexp re))
+	 (count
+	  (if (default-object? count)
+	      0
+	      (guarantee exact-nonnegative-integer? count 'regexp-replace))))
+
+    (define (find-match index n)
+      (let ((match (%regexp-search regexp string index end)))
+	(if match
+	    (if (< n count)
+		(find-match (regexp-match-submatch-start match 0)
+			    (- n 1))
+		(string-append (subst-match 'pre match string start end)
+			       (subst-match subst match string start end)
+			       (subst-match 'post match string start end)))
+	    (substring string start end))))
+
+    (find-match start 0)))
+
+(define (regexp-replace-all re string subst #!optional start end)
+  (guarantee regexp-replace-subst? subst 'regexp-replace-all)
+  (let* ((len (string-length string))
+	 (end (if end (fix:end-index end len 'regexp-replace-all) len))
+	 (start (fix:start-index start end 'regexp-replace-all))
+	 (regexp (regexp re)))
+
+    (define (find-matches index)
+      (let ((match (%regexp-search regexp string index end)))
+	(if match
+	    (cons match
+		  (find-matches (regexp-match-submatch-start match 0)))
+	    '())))
+
+    (define (subst-matches matches start)
+      (if (pair? matches)
+	  (let ((match (car matches))
+		(matches (cdr matches)))
+	    (cons* (subst-match 'pre match string start end)
+		   (subst-match subst match string start
+				(if (pair? matches)
+				    (regexp-match-submatch-start (car matches)
+								 0)
+				    end))
+		   (subst-matches matches (regexp-match-submatch-end match 0))))
+	  '()))
+
+    (let ((matches (find-matches start)))
+      (if (pair? matches)
+	  (string-append* (subst-matches matches start))
+	  (substring string start end)))))
+
+(define (regexp-replace-subst? object)
+  (or (string? object)
+      (regexp-match-key? object)
+      (eq? 'pre object)
+      (eq? 'post object)))
+(register-predicate! regexp-replace-subst? 'regexp-replace-subst)
+
+(define (subst-match match subst string start end)
+  (cond ((string? subst)
+	 subst)
+	((eq? 'pre subst)
+	 (string-slice string start (regexp-match-submatch-start match 0)))
+	((eq? 'post subst)
+	 (string-slice starting (regexp-match-submatch-end match 0) end))
+	(else
+	 (or (regexp-match-submatch match subst) ""))))
+
+(define (regexp-match-key? object)
+  (or (exact-nonnegative-integer? object)
+      (interned-symbol? object)))
+(register-predicate! regexp-match-key? 'regexp-match-key)
+
+(define (regexp-match-replace-template? object)
+  (or (string? object)
+      (regexp-match-key? object)
+      (and (list? object)
+	   (every subst-template? object))))
+(register-predicate! regexp-match-replace-template?
+		     'regexp-match-replace-template)
+
+(define (regexp-match-replace match template)
+  (guarantee regexp-match? match 'regexp-match-replace)
+  (let ((builder (string-builder)))
+    (let loop ((template template))
+      (cond ((string? template)
+	     (builder template))
+	    ((regexp-match-key? template)
+	     (builder (or (regexp-match-submatch match template) "")))
+	    ((list? template)
+	     (for-each loop template))
+	    (else
+	     (error:not-a regexp-match-replace-template? template
+			  'regexp-match-replace))))
+    (builder)))
+
 ;;;; Compiler rules
 
 (define sre-rules)
@@ -271,10 +379,6 @@ USA.
 
 (define (max-arity? object)
   (exact-nonnegative-integer? object))
-
-(define (backref-key? object)
-  (or (exact-positive-integer? object)
-      (interned-symbol? object)))
 
 ;;;; <sre>
 
-- 
2.25.1