equivalently for all purposes. If two such sequences are normalized to
the same form, the resulting normalized sequences will be identical.
-By default, most procedures that return strings return them in
-@acronym{NFC}. Notable exceptions are @code{list->string},
-@code{vector->string}, and the @code{utfX->string} procedures, which
-do no normalization, and of course @code{string->nfd}.
+By default, most procedures that return strings do not normalize, so it
+is up to the programmer to normalize as needed.
Generally speaking, @acronym{NFC} is preferred for most purposes, as
it is the minimal-length sequence for the variants. Consult the
@end itemize
The ``result'' arguments control the form of the returned string. The
-arguments @code{immutable} and @code{mutable} are straightforward,
-specifying the mutability of the returned string. For these
-arguments, the returned string contains exactly the same characters,
-in the same order, as were appended to the builder.
-
-However, calling with the argument @code{nfc}, or with no arguments,
-returns an immutable string in Unicode Normalization Form C, exactly
-as if @code{string->nfc} were called on one of the other two result
-strings.
+arguments @code{immutable} (or no argument) and @code{mutable} are
+straightforward, specifying the mutability of the returned string.
+For these arguments, the returned string contains exactly the same
+characters, in the same order, as were appended to the builder.
+
+However, calling with the argument @code{nfc}, returns an immutable
+string in Unicode Normalization Form C, exactly as if
+@code{string->nfc} were called on one of the other two result strings.
@end deffn
@deffn procedure string-joiner infix prefix suffix
(define (string-tabulate proc n)
(let ((builder (string-builder n)))
(do ((i 0 (fx+ i 1)))
- ((not (fx<? i n)) (builder))
+ ((not (fx<? i n)) (builder 'immutable))
(builder (proc i)))))
;; TODO: move this into string.scm and make it fast.
(loop (successor seed)))
((not (default-object? make-final))
(builder (make-final seed)))))
- (builder))))
+ (builder 'immutable))))
(define string-unfold
(unfolder string-builder))
(builder (string-slice string1 0 start1))
(builder (string-slice string2 start2 end2))
(builder (string-slice string1 end1 len1))
- (builder))))))
+ (builder 'immutable))))))
\f
(define (string-prefix-length string1 string2
#!optional start1 end1 start2 end2)
(do ((index start (fx+ index 1)))
((not (fx<? index end)))
(builder (proc index)))
- (builder))))
+ (builder 'immutable))))
(define (string-for-each-index proc string #!optional start end)
(let* ((end (fix:end-index end (string-length string) 'string-for-each-index))
((not (fx<? index end)))
(if (pred (string-ref string index))
(builder (string-ref string index))))
- (builder))))
+ (builder 'immutable))))
(define (string-remove pred string #!optional start end)
(let* ((end (fix:end-index end (string-length string) 'string-remove))
((not (fx<? index end)))
(if (not (pred (string-ref string index)))
(builder (string-ref string index))))
- (builder))))
+ (builder 'immutable))))
(define (string-repeat kernel n)
(guarantee non-negative-fixnum? n 'string-repeat)
(do ((i 0 (fx+ i 1)))
((not (fx<? i n)))
(builder kernel))
- (builder)))
+ (builder 'immutable)))
(define (xsubstring string #!optional from to start end)
(let* ((end (fix:end-index end (string-length string) 'xsubstring))
(do ((i from (+ i 1)))
((not (< i to)))
(builder (string-ref string (+ start (modulo i n)))))
- (builder))))))
+ (builder 'immutable))))))
\f
(define (string-split string delimiter #!optional grammar limit start end)
(let* ((end (fix:end-index end (string-length string) 'string-split))
(loop index*)))
(if (fix:< index end)
(builder (truncated index))))))
- (builder))))
+ (builder 'immutable))))
(define utf8->string)
(define utf16be->string)
(do ((i start (fix:+ i 1)))
((not (fix:< i end)))
(builder (integer->char (bytevector-u8-ref bytes i))))
- (builder)))
+ (builder 'immutable)))
\f
(define (bytevector->hexadecimal bytes)
(define-integrable (hex-char k)
((not (fix:< i n)))
(builder (hex-char (fix:lsh (bytevector-u8-ref bytes i) -4)))
(builder (hex-char (bytevector-u8-ref bytes i))))
- (builder)))
+ (builder 'immutable)))
(define (hexadecimal->bytevector string)
(guarantee string? string 'hexadecimal->bytevector)
(loop))
(else
(error "Illegal character in HTML form data:" char))))))
- (builder)))
+ (builder 'immutable)))
\f
;;;; Encoder
((not (pair? data)))
(write-char #\&)
(write-datum (car data)))))
- (builder)))
+ (builder 'immutable)))
(define-deferred char-set:unreserved
(char-set-difference char-set:ascii
(cond ((eof-object? byte)
(if (builder 'empty?)
byte
- (builder)))
+ (builder 'immutable)))
((fix:= 13 byte)
(let ((line (builder)))
(if (fix:= 10 (peek-u8 port))
char
(builder)))
((char=? char #\newline)
- (builder))
+ (builder 'immutable))
(else
(builder char)
(loop)))))))))
(builder)))
((char-in-set? char delimiters)
(input-port/unread-char port char)
- (builder))
+ (builder 'immutable))
(else
(builder char)
(loop)))))))))
(string-builder)))
(define (finish-attributes-comment builder db)
- (let ((attributes (and builder (parse-file-attributes-string (builder)))))
+ (let ((attributes
+ (and builder
+ (parse-file-attributes-string (builder 'immutable)))))
(if attributes
(begin
(process-file-attributes attributes db)
(begin
(builder (%read-char db))
(loop))))
- (builder)))
+ (builder 'immutable)))
(define (%atom-end? db)
(let ((char (%peek-char db)))
(integer->char (fix:+ (fix:lsh (fix:+ (fix:lsh d1 3) d2) 3) d3))))
(loop)
- (builder)))
+ (builder 'immutable)))
\f
(define (handler:false db ctx char1 char2)
ctx char1
(%read-char/no-eof db)
char)))
(loop))))
- (name->char (builder)
+ (name->char (builder 'immutable)
(eq? #t (db-fold-case? db))))))))
\f
(define (handler:named-constant db ctx char1 char2)
(begin
(builder char)
(loop)))))
- (builder))))
+ (builder 'immutable))))
\f
;;;; Datum labels
(do ((position start-position (next-position position)))
((same-positions? position end-position))
(builder (next-char position)))
- (builder)))
+ (builder 'immutable)))
(define (make-source-position source)
(let ((marker (list 'source-position)))
(builder range)))
(loop (cdr ranges)))))
(builder #\])
- (builder))
+ (builder 'immutable))
(re-quote-string (string (car chars))))
"")))
\ No newline at end of file
(guarantee regsexp-match? match 'regsexp-replacer)
(let ((builder (string-builder)))
(replacer builder match)
- (builder)))))
+ (builder 'immutable)))))
(define (%regsexp-replacer replacement caller)
(let loop ((r replacement))
(builder (car exprs))
(optimize-alt (cdr exprs) builder))
(if builder
- (cons (builder)
+ (cons (builder 'immutable)
(cons (car exprs)
(optimize-alt (cdr exprs) #f)))
(cons (car exprs)
(optimize-alt (cdr exprs) #f))))
(if builder
- (list (builder))
+ (list (builder 'immutable))
'())))
(define (dispatch)
(cond ((eof-object? byte)
(if (builder 'empty?)
byte
- (builder)))
+ (builder 'immutable)))
((fix:= 13 byte)
(let ((byte (peek-u8 port)))
(cond ((eof-object? byte)
(else
(parse-error port "Invalid line ending:"
'read-ascii-line))))
- (builder))
+ (builder 'immutable))
((fix:= 10 byte)
(parse-error port "Invalid line ending:" 'read-ascii-line))
((and (fix:<= 32 byte) (fix:<= byte 126))
(builder #\\))
(builder char))
string)
- (builder)))
+ (builder 'immutable)))
\f
;;;; Pattern Compiler
(do ((i 0 (fix:+ i 1)))
((not (fix:< i 1)))
(sbuilder (integer->char (get-byte))))
- (write (sbuilder) output)
+ (write (sbuilder 'immutable) output)
(fix:+ 1 n)))
((jump on-failure-jump maybe-finalize-jump dummy-failure-jump)
(write-char #\space output)
(define (string-out/extract-output! port)
(let* ((os (textual-port-state port))
(builder (ostate-builder os))
- (output (builder)))
+ (output (builder 'immutable)))
(builder 'reset!)
(set-ostate-column! os 0)
output))
(vector-for-each append-char! object))
(else
(case object
- ((#!default nfc) (build build-string:nfc))
- ((immutable) (build build-string:immutable))
+ ((#!default immutable) (build build-string:immutable))
+ ((nfc) (build build-string:nfc))
((mutable) (build build-string:mutable))
((legacy) (build build-string:legacy))
((empty? count max-cp reset!) ((builder object)))
if= if< if>))
(define (string-compare-ci string1 string2 if= if< if>)
- (%string-compare (string-foldcase string1)
- (string-foldcase string2)
+ (%string-compare (%foldcase->nfc string1)
+ (%foldcase->nfc string2)
if= if< if>))
;; Non-Unicode implementation, acceptable to R7RS.
((fix:< end2 end1) (if>))
(else (if=))))))))
+(define-integrable (%foldcase->nfc string)
+ (string->nfc (string-foldcase string)))
+
(define-integrable (true) #t)
(define-integrable (false) #f)
(define string>? (string-comparison-maker string->nfc %string>?))
(define string>=? (string-comparison-maker string->nfc %string>=?))
-(define string-ci=? (string-comparison-maker string-foldcase %string=?))
-(define string-ci<? (string-comparison-maker string-foldcase %string<?))
-(define string-ci<=? (string-comparison-maker string-foldcase %string<=?))
-(define string-ci>? (string-comparison-maker string-foldcase %string>?))
-(define string-ci>=? (string-comparison-maker string-foldcase %string>=?))
+(define string-ci=? (string-comparison-maker %foldcase->nfc %string=?))
+(define string-ci<? (string-comparison-maker %foldcase->nfc %string<?))
+(define string-ci<=? (string-comparison-maker %foldcase->nfc %string<=?))
+(define string-ci>? (string-comparison-maker %foldcase->nfc %string>?))
+(define string-ci>=? (string-comparison-maker %foldcase->nfc %string>=?))
\f
;;;; Match
(string->nfc (string-slice string start end))))
(define (string-prefix-ci? prefix string #!optional start end)
- (%string-prefix? (string-foldcase prefix)
- (string-foldcase (string-slice string start end))))
+ (%string-prefix? (%foldcase->nfc prefix)
+ (%foldcase->nfc (string-slice string start end))))
(define (%string-prefix? prefix string)
(let ((n (string-length prefix)))
(string->nfc (string-slice string start end))))
(define (string-suffix-ci? suffix string #!optional start end)
- (%string-suffix? (string-foldcase suffix)
- (string-foldcase (string-slice string start end))))
+ (%string-suffix? (%foldcase->nfc suffix)
+ (%foldcase->nfc (string-slice string start end))))
(define (%string-suffix? suffix string)
(let ((n (string-length suffix))
(do ((index 0 (fix:+ index 1)))
((not (fix:< index end)))
(builder (transform (string-ref string index))))
- (builder)))
+ (builder 'immutable)))
(define (string-titlecase string)
(let ((builder (string-builder)))
end)
0
(string-word-breaks string))
- (builder)))
+ (builder 'immutable)))
(define (maybe-titlecase string start end builder)
(let loop ((index start))
(guarantee string? string caller)
(builder string))
strings)
- (builder)))
+ (builder 'immutable)))
(define (string . objects)
(string* objects))
(lambda (port)
(display object port))))))))
objects)
- (builder)))
+ (builder 'immutable)))
\f
;;;; Mapping
(do ((i 0 (fix:+ i 1)))
((not (fix:< i n)))
(builder (proc i)))
- (builder))))
+ (builder 'immutable))))
\f
(define (string-count proc string . strings)
(receive (n proc) (mapper-values proc string strings)
(builder string))
(cdr strings))
(builder suffix)
- (builder))
+ (builder 'immutable))
"")))))
(define-deferred string-joiner-options
(builder fill-with))
(if (eq? where 'leading)
(builder string))
- (builder))))))))
+ (builder 'immutable))))))))
(define (grapheme-cluster-string? object)
(and (string? object)
(ustring-length string)))))
(define (string-ci-hash string #!optional modulus)
- (string-hash (string-foldcase string) modulus))
+ (string-hash (%foldcase->nfc string) modulus))
(define (8-bit-string? object)
(and (string? object)
(define (string->uninterned-symbol string #!optional start end)
((ucode-primitive system-pair-cons) (ucode-type uninterned-symbol)
- (string->utf8 string start end)
+ (string->utf8
+ (string->nfc
+ (substring string start end)))
(make-unmapped-unbound-reference-trap)))
(define (string->symbol string #!optional start end)
(default-object? start)
(default-object? end))
(->bytes string)
- (string->utf8 string start end))))
+ (string->utf8 (string->nfc (substring string start end))))))
(define (symbol->string symbol)
(let ((bytes (%symbol-bytes symbol 'symbol->string)))
(ustring-ascii? string))
;; Needed during cold load.
(->bytes (ascii-string-foldcase string))
- (string->utf8 (string-foldcase string))))
+ (string->utf8 (string->nfc (string-foldcase string)))))
(define (ustring-ascii? string)
(let ((end (ustring-length string)))
(do ((i 0 (fix:+ i 1)))
((not (fix:< i n)))
(builder (integer->char (bytevector-u8-ref bv i))))
- (builder)))
+ (builder 'immutable)))
(define (set-status-header message code)
(set-header message
(builder (string-slice string start index))
(loop (step-over-eol index)))
(builder (string-slice string start)))))
- (builder))))
+ (builder 'immutable))))
(if (if (default-object? always-copy?) #f always-copy?)
(string-copy string)
string)))
(builder #\space))
(builder (symbol->string nmtoken)))
nmtokens)
- (builder)))
\ No newline at end of file
+ (builder 'immutable)))
\ No newline at end of file
(lambda (expected re string #!optional start end)
(let ((thunk
(lambda ()
- (translate-regexp-match (proc re string start end)))))
+ (translate-regexp-match
+ (proc re string start end)))))
(lambda ()
(with-test-properties
(lambda ()
(match-all-test '(0 5) '(* numeric) "12345")
(match-all-test #f '(w/ascii (* numeric)) "12345")
- (match-all-test '(0 1) 'grapheme "한")
- (match-all-test '(0 1) 'grapheme "글")
+ (match-all-test '(0 1) 'grapheme (string->nfc "한"))
+ (match-all-test '(0 1) 'grapheme (string->nfc "글"))
- (match-all-test '(0 1) '(: bog grapheme eog) "한")
- (match-all-test #f '(: "ᄒ" bog grapheme eog "ᆫ") "한"))
+ (match-all-test '(0 1) '(: bog grapheme eog) (string->nfc "한"))
+ (match-all-test #f '(: "ᄒ" bog grapheme eog "ᆫ") (string->nfc "한")))
(define-test 'chibi-extract
(lambda ()
(define (build-string objects)
(let ((builder (string-builder)))
(for-each builder objects)
- (builder)))
+ (builder 'immutable)))
(define (chars->string chars)
(let ((s (make-string (length chars))))