From: Chris Hanson <org/chris-hanson/cph> Date: Sun, 19 Feb 2017 21:03:34 +0000 (-0800) Subject: Implement string-builder. X-Git-Tag: mit-scheme-pucked-9.2.12~220^2~60 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=418f64fcc7603cb727c0ec7915be55fcf5f896d9;p=mit-scheme.git Implement string-builder. This hides most of the details of building strings, and continues to work even if we add immutable strings. --- diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index 8918a406e..61c957d93 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -1127,6 +1127,7 @@ USA. string-any string-append string-append* + string-builder string-ci-hash string-ci<=? string-ci<? diff --git a/src/runtime/ustring.scm b/src/runtime/ustring.scm index a15d4c39d..2e038ef97 100644 --- a/src/runtime/ustring.scm +++ b/src/runtime/ustring.scm @@ -211,28 +211,80 @@ USA. start (fix:- end start)))))) +(define (string-builder) + ;; This is optimized to minimize copying, so it wastes some space. + (let ((buffer-size 16)) + (let ((buffers '()) + (buffer (full-string-allocate buffer-size)) + (index 0)) + + (define (new-buffer!) + (set! buffers (cons (string-slice buffer 0 index) buffers)) + (set! buffer (full-string-allocate buffer-size)) + (set! index 0) + unspecific) + + (define (append-char! char) + (if (not (fix:< index buffer-size)) + (new-buffer!)) + (string-set! buffer index char) + (set! index (fix:+ index 1)) + unspecific) + + (define (append-string! string) + (if (fix:> index 0) + (new-buffer!)) + (set! buffers (cons string buffers)) + unspecific) + + (define (build) + (let ((strings (reverse! (cons (string-slice buffer 0 index) buffers)))) + (set! buffer) + (set! buffers) + (set! index) + (let ((result + (do ((strings strings (cdr strings)) + (n 0 (fix:+ n (string-length (car strings)))) + (8-bit? #t (and 8-bit? (string-8-bit? (car strings))))) + ((not (pair? strings)) + (if 8-bit? + (legacy-string-allocate n) + (full-string-allocate n)))))) + (do ((strings strings (cdr strings)) + (i 0 (string-copy! result i (car strings)))) + ((not (pair? strings)))) + result))) + + (lambda (#!optional object) + (cond ((default-object? object) (build)) + ((bitless-char? object) (append-char! object)) + ((string? object) (append-string! object)) + (else (error "Not a char or string:" object))))))) + (define (string-copy! to at from #!optional start end) (let* ((end (fix:end-index end (string-length from) 'string-copy!)) (start (fix:start-index start end 'string-copy!))) (guarantee index-fixnum? at 'string-copy!) - (if (not (fix:<= (fix:+ at (fix:- end start)) (string-length to))) - (error:bad-range-argument to 'string-copy!)) - (receive (to at) - (if (slice? to) - (values (slice-string to) - (fix:+ (slice-start to) at)) - (values to at)) - (receive (from start end) (translate-slice from start end) - (if (legacy-string? to) - (if (legacy-string? from) - (copy-loop legacy-string-set! to at - legacy-string-ref from start end) - (copy-loop legacy-string-set! to at - %full-string-ref from start end)) - (if (legacy-string? from) - (copy-loop %full-string-set! to at - legacy-string-ref from start end) - (%full-string-copy! to at from start end))))))) + (let ((final-at (fix:+ at (fix:- end start)))) + (if (not (fix:<= final-at (string-length to))) + (error:bad-range-argument to 'string-copy!)) + (receive (to at) + (if (slice? to) + (values (slice-string to) + (fix:+ (slice-start to) at)) + (values to at)) + (receive (from start end) (translate-slice from start end) + (if (legacy-string? to) + (if (legacy-string? from) + (copy-loop legacy-string-set! to at + legacy-string-ref from start end) + (copy-loop legacy-string-set! to at + %full-string-ref from start end)) + (if (legacy-string? from) + (copy-loop %full-string-set! to at + legacy-string-ref from start end) + (%full-string-copy! to at from start end))))) + final-at))) (define-integrable (%full-string-copy! to at from start end) (cp-vector-copy! (%full-string-cp-vector to) at diff --git a/tests/runtime/test-string.scm b/tests/runtime/test-string.scm index 597def903..ca736b80d 100644 --- a/tests/runtime/test-string.scm +++ b/tests/runtime/test-string.scm @@ -181,4 +181,71 @@ USA. (assert-string-ci= "Strasse" "Stra\xDF;e") (assert-string-ci= "STRASSE" "Stra\xDF;e") (assert-string-ci= "\x3BE;\x3B1;\x3BF;\x3C2;" "\x39E;\x391;\x39F;\x3A3;") - (assert-string-ci= "\x3BE;\x3B1;\x3BF;\x3C3;" "\x39E;\x391;\x39F;\x3A3;"))) \ No newline at end of file + (assert-string-ci= "\x3BE;\x3B1;\x3BF;\x3C3;" "\x39E;\x391;\x39F;\x3A3;"))) + +(define-test 'string-builder + (lambda () + (let ((end (length latin-alphabet))) + (do ((i 0 (fix:+ i 1))) + ((not (fix:< i end))) + (let ((chars (list-head latin-alphabet i))) + (let ((result (build-string chars))) + (assert-true (legacy-string? result)) + (assert-string= result (chars->string chars)))) + (let ((strings (make-test-strings i latin-alphabet #f))) + (let ((result (build-string strings))) + (assert-true (legacy-string? result)) + (assert-string= result (string-append* strings)))) + (let ((strings (make-test-strings i latin-alphabet #t))) + (let ((result (build-string strings))) + (assert-true (legacy-string? result)) + (assert-string= result (string-append* strings)))))) + (let ((end (length greek-alphabet))) + (do ((i 0 (fix:+ i 1))) + ((not (fix:< i end))) + (let ((chars (list-head greek-alphabet i))) + (assert-string= (build-string chars) + (chars->string chars))) + (let ((strings (make-test-strings i greek-alphabet #f))) + (assert-string= (build-string strings) + (string-append* strings))) + (let ((strings (make-test-strings i greek-alphabet #t))) + (assert-string= (build-string strings) + (string-append* strings))))))) + +(define legacy-string? + (make-primitive-procedure 'string? 1)) + +(define latin-alphabet + '(#\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m + #\n #\o #\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\z)) + +(define greek-alphabet + '(#\x3B1 #\x3B2 #\x3B3 #\x3B4 #\x3B5 + #\x3B6 #\x3B7 #\x3B8 #\x3B9 #\x3BA + #\x3BB #\x3BC #\x3BD #\x3BE #\x3BF + #\x3C0 #\x3C1 #\x3C2 #\x3C3 #\x3C4 + #\x3C5 #\x3C6 #\x3C7 #\x3C8 #\x3C9)) + +(define (build-string objects) + (let ((builder (string-builder))) + (for-each builder objects) + (builder))) + +(define (chars->string chars) + (let ((s (make-ustring (length chars)))) + (do ((chars chars (cdr chars)) + (i 0 (fix:+ i 1))) + ((not (pair? chars))) + (string-set! s i (car chars))) + s)) + +(define (make-test-strings n alphabet reverse?) + (let loop ((k 0) (strings '())) + (if (fix:< k n) + (loop (fix:+ k 1) + (cons (chars->string (list-head alphabet k)) + strings)) + (if reverse? + strings + (reverse! strings))))) \ No newline at end of file