From 23045820895cb3087e1f765c7574e754d9c9816f Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Fri, 10 Feb 2017 20:40:46 -0800 Subject: [PATCH] Implement char-foldcase and ustring-foldcase. Also fix implementations of ustring-{up,down}case. --- src/runtime/char.scm | 16 +++++++-------- src/runtime/runtime.pkg | 14 +++++++++++-- src/runtime/ustring.scm | 45 +++++++++++++++++++++++++++++------------ 3 files changed, 51 insertions(+), 24 deletions(-) diff --git a/src/runtime/char.scm b/src/runtime/char.scm index 5bf608cdc..d413d9e7c 100644 --- a/src/runtime/char.scm +++ b/src/runtime/char.scm @@ -118,7 +118,7 @@ USA. (fix:>= (char-ci->integer x) (char-ci->integer y))) (define-integrable (char-ci->integer char) - (char->integer (char-upcase char))) + (ucd-scf-value (char->integer char))) (define (char=-predicate char) (guarantee char? char 'char=-predicate) @@ -132,17 +132,15 @@ USA. (define (char-downcase char) (guarantee unicode-char? char 'char-downcase) - (let ((cp (ucd-slc-value (char->integer char)))) - (if (index-fixnum? cp) - (integer->char cp) - char))) + (integer->char (ucd-slc-value (char->integer char)))) + +(define (char-foldcase char) + (guarantee unicode-char? char 'char-foldcase) + (integer->char (ucd-scf-value (char->integer char)))) (define (char-upcase char) (guarantee unicode-char? char 'char-upcase) - (let ((cp (ucd-suc-value (char->integer char)))) - (if (index-fixnum? cp) - (integer->char cp) - char))) + (integer->char (ucd-suc-value (char->integer char)))) (define-deferred 0-code (char->integer #\0)) ;; Next two codes are offset by 10 to speed up CHAR->DIGIT. diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index e1020f50c..16a20778c 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -1177,7 +1177,7 @@ USA. ustring-find-last-char ;prefer ustring-find-last-index ustring-find-last-char-in-set ;prefer ustring-find-last-index ustring-find-last-index - ;; ustring-foldcase + ustring-foldcase ustring-for-each ustring-hash ustring-head @@ -1357,6 +1357,7 @@ USA. char-code char-code-limit char-downcase + char-foldcase char-general-category char-integer-limit char-upcase @@ -1400,11 +1401,15 @@ USA. (define-package (runtime ucd-tables) (files "ucd-table-alpha" + "ucd-table-cf" "ucd-table-gc" + "ucd-table-lc" "ucd-table-lower" "ucd-table-nt" + "ucd-table-scf" "ucd-table-slc" "ucd-table-suc" + "ucd-table-uc" "ucd-table-upper" "ucd-table-wspace") (parent (runtime)) @@ -1415,10 +1420,15 @@ USA. char-set:upper-case) (export (runtime character) ucd-gc-value + ucd-scf-value ucd-slc-value ucd-suc-value) (export (runtime ucd-table-glue) - ucd-nt-value)) + ucd-nt-value) + (export (runtime ustring) + ucd-cf-value + ucd-lc-value + ucd-uc-value)) (define-package (runtime ucd-table-glue) (files "ucd-table-glue") diff --git a/src/runtime/ustring.scm b/src/runtime/ustring.scm index b2c97ecd0..24a6306df 100644 --- a/src/runtime/ustring.scm +++ b/src/runtime/ustring.scm @@ -236,15 +236,29 @@ USA. (utf32-string-ref string i)) strings)))))) -;; Incorrect implementation -(define (utf32-string-upcase string) - (utf32-string-map char-upcase string)) - -;; Incorrect implementation (define (utf32-string-downcase string) - (utf32-string-map char-downcase string)) + (utf32-case-transform string ucd-lc-value 'utf32-string-downcase)) -;; Random and probably incorrect. +(define (utf32-string-foldcase string) + (utf32-case-transform string ucd-cf-value 'utf32-string-foldcase)) + +(define (utf32-string-upcase string) + (utf32-case-transform string ucd-uc-value 'utf32-string-upcase)) + +(define (utf32-case-transform string sv-transform caller) + (let ((svs + (append-map (lambda (char) + (sv-transform (char->integer char))) + (utf32-string->list string)))) + (let ((n (length svs))) + (let ((result (make-utf32-string n))) + (do ((svs svs (cdr svs)) + (i 0 (fix:+ i 1))) + ((not (pair? svs))) + (utf32-string-set! result i (integer->char (car svs)))) + result)))) + +;; Incorrect, needs title-case implementation (define (utf32-string-capitalize string) (let ((index (utf32-string-find-first-index char-alphabetic? string)) (string (utf32-string-copy string))) @@ -414,7 +428,7 @@ USA. (and (fix:= (ustring-length string1) (ustring-length string2)) (ustring-every char-ci=? string1 string2))) -;; Incorrect implementation. +;; Non-Unicode implementation, acceptable to R7RS. (define-integrable (%string-comparison-maker c= c<) (lambda (string1 string2) (let ((end1 (ustring-length string1)) @@ -649,16 +663,21 @@ USA. (define (ustring-find-last-char-in-set string char-set #!optional start end) (ustring-find-last-index (char-set-predicate char-set) string start end)) -(define (ustring-upcase string) - (cond ((legacy-string? string) (legacy-string-upcase string)) - ((utf32-string? string) (utf32-string-upcase string)) - (else (error:not-a ustring? string 'ustring-upcase)))) - (define (ustring-downcase string) (cond ((legacy-string? string) (legacy-string-downcase string)) ((utf32-string? string) (utf32-string-downcase string)) (else (error:not-a ustring? string 'ustring-downcase)))) +(define (ustring-foldcase string) + (cond ((legacy-string? string) (legacy-string-downcase string)) + ((utf32-string? string) (utf32-string-foldcase string)) + (else (error:not-a ustring? string 'ustring-foldcase)))) + +(define (ustring-upcase string) + (cond ((legacy-string? string) (legacy-string-upcase string)) + ((utf32-string? string) (utf32-string-upcase string)) + (else (error:not-a ustring? string 'ustring-upcase)))) + (define (ustring-capitalize string) (cond ((legacy-string? string) (legacy-string-capitalize string)) ((utf32-string? string) (utf32-string-capitalize string)) -- 2.25.1