From: Chris Hanson Date: Sun, 26 Mar 2017 23:12:04 +0000 (-0700) Subject: Change string-builder to normalize to NFC by default. X-Git-Tag: mit-scheme-pucked-9.2.12~158^2~71 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=d98f4d17ca26665bdc11af16863d1066777656dd;p=mit-scheme.git Change string-builder to normalize to NFC by default. --- diff --git a/src/runtime/ustring.scm b/src/runtime/ustring.scm index 56f778a8a..5618fc982 100644 --- a/src/runtime/ustring.scm +++ b/src/runtime/ustring.scm @@ -214,20 +214,29 @@ USA. ;;;; Streaming build -(define (string-builder) - (let ((builder - (make-sequence-builder full-string-allocate - string-length - string-ref - string-set! - 16 - string-builder:finish-build))) - (lambda (#!optional object) - (cond ((default-object? object) ((builder 'build))) - ((bitless-char? object) ((builder 'append-element!) object)) - ((string? object) ((builder 'append-sequence!) object)) - ((memq object '(empty? count reset!)) ((builder object))) - (else (error "Not a char or string:" object)))))) +(define (string-builder . options) + (receive (buffer-length ->nfc?) + (string-builder-options options 'string-builder) + (let ((builder + (make-sequence-builder full-string-allocate + string-length + string-ref + string-set! + buffer-length + (if ->nfc? + string-builder:finish-build-nfc + string-builder:finish-build)))) + (lambda (#!optional object) + (cond ((default-object? object) ((builder 'build))) + ((bitless-char? object) ((builder 'append-element!) object)) + ((string? object) ((builder 'append-sequence!) object)) + ((memq object '(empty? count reset!)) ((builder object))) + (else (error "Not a char or string:" object))))))) + +(define-deferred string-builder-options + (keyword-option-parser + (list (list 'buffer-length positive-fixnum? 16) + (list '->nfc? boolean? #t)))) (define (string-builder:finish-build parts) (let ((result @@ -246,6 +255,9 @@ USA. ((not (pair? parts))) (string-copy! result i (caar parts) 0 (cdar parts))) result)) + +(define (string-builder:finish-build-nfc parts) + (string->nfc (string-builder:finish-build parts))) ;;;; Copy @@ -555,7 +567,7 @@ USA. (define (canonical-decomposition string) (let ((end (string-length string)) - (builder (string-builder))) + (builder (string-builder '->nfc? #f))) (do ((i 0 (fix:+ i 1))) ((not (fix:< i end))) (let loop ((char (string-ref string i))) @@ -605,7 +617,7 @@ USA. (define (canonical-composition string) (let ((end (string-length string)) - (builder (string-builder)) + (builder (string-builder '->nfc? #f)) (sk ucd-canonical-cm-second-keys) (sv ucd-canonical-cm-second-values)) diff --git a/tests/runtime/test-string.scm b/tests/runtime/test-string.scm index 3e12df3ef..d69e69459 100644 --- a/tests/runtime/test-string.scm +++ b/tests/runtime/test-string.scm @@ -175,7 +175,7 @@ USA. 'expression string))) (define (convert-break-test-case test-case) - (let ((builder (string-builder))) + (let ((builder (string-builder '->nfc? #f))) (let loop ((test-case test-case) (index 0) (breaks '())) (let ((breaks (if (car test-case)