From: Chris Hanson Date: Sun, 19 Feb 2017 01:42:09 +0000 (-0800) Subject: Implement ustring-{lower,upper}-case?. X-Git-Tag: mit-scheme-pucked-9.2.12~220^2~72 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=15be5b23e2e186c0e925aa51597434c85794a51d;p=mit-scheme.git Implement ustring-{lower,upper}-case?. --- diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index 6acd8ce7f..3e31bfa17 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -1092,7 +1092,7 @@ USA. string-joiner string-joiner* ;; string-length - string-lower-case? + ;; string-lower-case? ;; string-map string-match-backward string-match-backward-ci @@ -1121,7 +1121,7 @@ USA. string-trim-right ;; string-upcase string-upcase! - string-upper-case? + ;; string-upper-case? ;; string<=? ;; stringnfd string)) + (end (ustring-length nfd))) + (let loop ((i 0)) + (if (fix:< i end) + (and (not (char-changes-when-lower-cased? (ustring-ref nfd i))) + (loop (fix:+ i 1))) + #t)))) + +(define (ustring-upper-case? string) + (let* ((nfd (ustring->nfd string)) + (end (ustring-length nfd))) + (let loop ((i 0)) + (if (fix:< i end) + (and (not (char-changes-when-upper-cased? (ustring-ref nfd i))) + (loop (fix:+ i 1))) + #t)))) + +(define (ustring->nfd string) + (if (ustring-in-nfd? string) + string + (canonical-ordering! (canonical-decomposition string)))) + +(define (ustring-in-nfd? string) + (let ((n (ustring-length string))) + (let loop ((i 0) (last-ccc 0)) + (if (fix:< i n) + (let* ((char (ustring-ref string i)) + (ccc (ucd-ccc-value char))) + (and (or (fix:= ccc 0) + (fix:>= ccc last-ccc)) + (char-nfd-quick-check? char) + (loop (fix:+ i 1) ccc))) + #t)))) + +(define (canonical-decomposition string) + (let ((end (ustring-length string))) + (let ((result + (make-ustring + (do ((i 0 (fix:+ i 1)) + (j 0 (fix:+ j (length (ucd-dm-value (ustring-ref string i)))))) + ((not (fix:< i end)) j))))) + (let loop ((i 0) (j 0)) + (if (fix:< i end) + (loop (fix:+ i 1) + (do ((chars (ucd-dm-value (ustring-ref string i)) + (cdr chars)) + (j j (fix:+ j 1))) + ((not (pair? chars)) j) + (ustring-set! result j (car chars)))))) + result))) + +(define (canonical-ordering! string) + (let ((end (ustring-length string))) + + (define (scan-for-non-starter i) + (if (fix:< i end) + (let* ((char (ustring-ref string i)) + (ccc (ucd-ccc-value char))) + (if (fix:= 0 ccc) + (scan-for-non-starter (fix:+ i 1)) + (maybe-twiddle char ccc i))))) + + (define (maybe-twiddle char1 ccc1 i1) + (let ((i2 (fix:+ i1 1))) + (if (fix:< i2 end) + (let* ((char2 (ustring-ref string i2)) + (ccc2 (ucd-ccc-value char2))) + (cond ((fix:= 0 ccc2) + (scan-for-non-starter (fix:+ i2 1))) + ((fix:<= ccc1 ccc2) + (maybe-twiddle char2 ccc2 i2)) + (else + (ustring-set! string i1 char2) + (ustring-set! string i2 char1) + (maybe-twiddle char1 ccc1 i2))))))) + + (scan-for-non-starter 0)) + string) + +#| +(define (quick-check string qc-value) + (let ((n (ustring-length string))) + (let loop ((i 0) (last-ccc 0) (result #t)) + (if (fix:< i n) + (let* ((char (ustring-ref string i)) + (ccc (ucd-ccc-value char))) + (if (and (fix:> ccc 0) + (fix:< ccc last-ccc)) + #f + (let ((check (qc-value char))) + (and check + (if (eq? check 'maybe) + (loop (fix:+ i 1) ccc check) + (loop (fix:+ i 1) ccc result)))))) + result)))) +|# (define (list->ustring chars) (if (every char-8-bit? chars) @@ -669,82 +767,4 @@ USA. (if (fix:< i end) (and (proc (ref string i)) (loop (fix:+ i 1))) - #t))) - -(define (ustring->nfd string) - (if (ustring-in-nfd? string) - string - (canonical-ordering! (canonical-decomposition string)))) - -(define (ustring-in-nfd? string) - (let ((n (ustring-length string))) - (let loop ((i 0) (last-ccc 0)) - (if (fix:< i n) - (let* ((char (ustring-ref string i)) - (ccc (ucd-ccc-value char))) - (and (or (fix:= ccc 0) - (fix:>= ccc last-ccc)) - (char-nfd-quick-check? char) - (loop (fix:+ i 1) ccc))) - #t)))) - -(define (canonical-decomposition string) - (let ((end (ustring-length string))) - (let ((result - (make-ustring - (do ((i 0 (fix:+ i 1)) - (j 0 (fix:+ j (length (ucd-dm-value (ustring-ref string i)))))) - ((not (fix:< i end)) j))))) - (let loop ((i 0) (j 0)) - (if (fix:< i end) - (loop (fix:+ i 1) - (do ((chars (ucd-dm-value (ustring-ref string i)) - (cdr chars)) - (j j (fix:+ j 1))) - ((not (pair? chars)) j) - (ustring-set! result j (car chars)))))) - result))) - -(define (canonical-ordering! string) - (let ((end (ustring-length string))) - - (define (scan-for-non-starter i) - (if (fix:< i end) - (let* ((char (ustring-ref string i)) - (ccc (ucd-ccc-value char))) - (if (fix:= 0 ccc) - (scan-for-non-starter (fix:+ i 1)) - (maybe-twiddle char ccc i))))) - - (define (maybe-twiddle char1 ccc1 i1) - (let ((i2 (fix:+ i1 1))) - (if (fix:< i2 end) - (let* ((char2 (ustring-ref string i2)) - (ccc2 (ucd-ccc-value char2))) - (cond ((fix:= 0 ccc2) - (scan-for-non-starter (fix:+ i2 1))) - ((fix:<= ccc1 ccc2) - (maybe-twiddle char2 ccc2 i2)) - (else - (ustring-set! string i1 char2) - (ustring-set! string i2 char1) - (maybe-twiddle char1 ccc1 i2))))))) - - (scan-for-non-starter 0)) - string) - -(define (quick-check string qc-value) - (let ((n (ustring-length string))) - (let loop ((i 0) (last-ccc 0) (result #t)) - (if (fix:< i n) - (let* ((char (ustring-ref string i)) - (ccc (ucd-ccc-value char))) - (if (and (fix:> ccc 0) - (fix:< ccc last-ccc)) - #f - (let ((check (qc-value char))) - (and check - (if (eq? check 'maybe) - (loop (fix:+ i 1) ccc check) - (loop (fix:+ i 1) ccc result)))))) - result)))) \ No newline at end of file + #t))) \ No newline at end of file