From: Chris Hanson Date: Sun, 23 Apr 2017 04:08:26 +0000 (-0700) Subject: Change string-match and string-search to require NFC inputs. X-Git-Tag: mit-scheme-pucked-9.2.12~158^2~8 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=c3d3964189b689ce4ca44b3ffa1dd1d5f8483fcb;p=mit-scheme.git Change string-match and string-search to require NFC inputs. This is because comparison requires that the strings be in the same normalization form, and these procedures return indices into the strings. We can't normalize them after the call, because then the returned indices will refer to strings that are potentially different from the arguments. Since nearly all strings are in NFC by default this should not be a serious drawback. Additionally, the -ci versions of these procedures have been eliminated, basically for the same reason. If the caller needs that functionality they should call string-foldcase themselves. Note that this doesn't affect comparisons that don't return indices. --- diff --git a/src/compiler/base/infnew.scm b/src/compiler/base/infnew.scm index ef9fc11a7..03e29207c 100644 --- a/src/compiler/base/infnew.scm +++ b/src/compiler/base/infnew.scm @@ -365,14 +365,16 @@ USA. (suffix-number y))))))) (define (standard-name? string prefix) - (let ((index (string-match-forward-ci string prefix)) - (end (string-length string))) - (and (= index (string-length prefix)) - (>= (- end index) 2) - (let ((next (string-ref string index))) - (or (char=? #\- next) - (char=? #\_ next))) - (let loop ((index (1+ index))) - (or (= index end) - (and (char-numeric? (string-ref string index)) - (loop (1+ index)))))))) \ No newline at end of file + (let ((string (string-foldcase string)) + (prefix (string-foldcase prefix))) + (let ((index (string-match-forward string prefix)) + (end (string-length string))) + (and (= index (string-length prefix)) + (>= (- end index) 2) + (let ((next (string-ref string index))) + (or (char=? #\- next) + (char=? #\_ next))) + (let loop ((index (1+ index))) + (or (= index end) + (and (char-numeric? (string-ref string index)) + (loop (1+ index))))))))) \ No newline at end of file diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index 27a04059a..eb2fc8777 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -1016,9 +1016,7 @@ USA. string-lower-case? string-map string-match-backward - string-match-backward-ci string-match-forward - string-match-forward-ci string-null? string-pad-left string-pad-right diff --git a/src/runtime/ustring.scm b/src/runtime/ustring.scm index c517e90c2..9691cf22b 100644 --- a/src/runtime/ustring.scm +++ b/src/runtime/ustring.scm @@ -79,6 +79,7 @@ USA. (register-predicate! string? 'string) (register-predicate! mutable-string? 'mutable-string '<= string?) (register-predicate! immutable-string? 'immutable-string '<= string?) + (register-predicate! nfc-string? 'nfc-string '<= string?) (register-predicate! legacy-string? 'legacy-string '<= string? '<= mutable-string?) @@ -701,6 +702,8 @@ USA. ;;;; Match (define (string-match-forward string1 string2) + (guarantee nfc-string? string1 'string-match-forward) + (guarantee nfc-string? string2 'string-match-forward) (let ((end1 (string-length string1)) (end2 (string-length string2))) (let ((end (fix:min end1 end2))) @@ -711,11 +714,9 @@ USA. (loop (fix:+ i 1)) i))))) -(define (string-match-forward-ci string1 string2) - (string-match-forward (string-foldcase string1) - (string-foldcase string2))) - (define (string-match-backward string1 string2) + (guarantee nfc-string? string1 'string-match-backward) + (guarantee nfc-string? string2 'string-match-backward) (let ((s1 (fix:- (string-length string1) 1))) (let loop ((i s1) (j (fix:- (string-length string2) 1))) (if (and (fix:>= i 0) @@ -726,10 +727,6 @@ USA. (fix:- j 1)) (fix:- s1 i))))) -(define (string-match-backward-ci string1 string2) - (string-match-backward (string-foldcase string1) - (string-foldcase string2))) - (define (string-prefix? prefix string #!optional start end) (%string-prefix? (string->nfc prefix) (string->nfc (string-slice string start end)))) @@ -838,6 +835,10 @@ USA. ;;;; Normalization +(define (nfc-string? string) + (and (string? string) + (string-in-nfc? string))) + (define (string-in-nfc? string) (cond ((legacy-string? string) #t) @@ -1485,6 +1486,8 @@ USA. (define-integrable (string-matcher caller matcher) (lambda (pattern text #!optional start end) + (guarantee nfc-string? pattern caller) + (guarantee nfc-string? text caller) (let ((pend (string-length pattern))) (if (fix:= 0 pend) (error:bad-range-argument pend caller))