From e2e8a05bf18435b1814f1bc2f090301eb97281e0 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Fri, 10 Feb 2017 23:52:19 -0800 Subject: [PATCH] Implement char-{down,fold,up}case-full and use in ustring. --- src/runtime/char.scm | 23 +++++++++++++++++++---- src/runtime/runtime.pkg | 17 ++++++++++------- src/runtime/ustring.scm | 21 ++++++++++----------- 3 files changed, 39 insertions(+), 22 deletions(-) diff --git a/src/runtime/char.scm b/src/runtime/char.scm index d24e855e7..62902286b 100644 --- a/src/runtime/char.scm +++ b/src/runtime/char.scm @@ -132,21 +132,36 @@ USA. (guarantee char? char 'char-ci=-predicate) (lambda (char*) (char-ci=? char* char))) - + (define (char-downcase char) - (guarantee unicode-char? char 'char-downcase) + (if (not (unicode-scalar-value? (char-code char))) + (error:not-a unicode-char? char 'char-downcase)) (%make-char (ucd-slc-value (char-code char)) (char-bits char))) (define (char-foldcase char) - (guarantee unicode-char? char 'char-foldcase) + (if (not (unicode-scalar-value? (char-code char))) + (error:not-a unicode-char? char 'char-foldcase)) (%make-char (ucd-scf-value (char-code char)) (char-bits char))) (define (char-upcase char) - (guarantee unicode-char? char 'char-upcase) + (if (not (unicode-scalar-value? (char-code char))) + (error:not-a unicode-char? char 'char-upcase)) (%make-char (ucd-suc-value (char-code char)) (char-bits char))) + +(define (char-downcase-full char) + (guarantee unicode-char? char 'char-downcase-full) + (map integer->char (ucd-lc-value (char->integer char)))) + +(define (char-foldcase-full char) + (guarantee unicode-char? char 'char-foldcase-full) + (map integer->char (ucd-cf-value (char->integer char)))) + +(define (char-upcase-full char) + (guarantee unicode-char? char 'char-upcase-full) + (map integer->char (ucd-uc-value (char->integer char)))) (define-deferred 0-code (char->integer #\0)) ;; Next two codes are offset by 10 to speed up CHAR->DIGIT. diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index e0135d85e..4977f2983 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -1398,7 +1398,11 @@ USA. unicode-code-point-general-category unicode-code-point? unicode-scalar-value->char - unicode-scalar-value?)) + unicode-scalar-value?) + (export (runtime) + char-downcase-full + char-foldcase-full + char-upcase-full)) (define-package (runtime ucd-tables) (files "ucd-table-alpha" @@ -1420,16 +1424,15 @@ USA. char-set:lower-case char-set:upper-case) (export (runtime character) + ucd-cf-value ucd-gc-value + ucd-lc-value ucd-scf-value ucd-slc-value - ucd-suc-value) + ucd-suc-value + ucd-uc-value) (export (runtime ucd-table-glue) - ucd-nt-value) - (export (runtime ustring) - ucd-cf-value - ucd-lc-value - ucd-uc-value)) + ucd-nt-value)) (define-package (runtime ucd-table-glue) (files "ucd-table-glue") diff --git a/src/runtime/ustring.scm b/src/runtime/ustring.scm index 170a51b61..e0b48e7ae 100644 --- a/src/runtime/ustring.scm +++ b/src/runtime/ustring.scm @@ -237,25 +237,24 @@ USA. strings)))))) (define (utf32-string-downcase string) - (utf32-case-transform string ucd-lc-value 'utf32-string-downcase)) + (utf32-case-transform string char-downcase-full 'utf32-string-downcase)) (define (utf32-string-foldcase string) - (utf32-case-transform string ucd-cf-value 'utf32-string-foldcase)) + (utf32-case-transform string char-foldcase-full 'utf32-string-foldcase)) (define (utf32-string-upcase string) - (utf32-case-transform string ucd-uc-value 'utf32-string-upcase)) + (utf32-case-transform string char-upcase-full 'utf32-string-upcase)) -(define (utf32-case-transform string sv-transform caller) - (let ((svs - (append-map (lambda (char) - (sv-transform (char->integer char))) +(define (utf32-case-transform string transform caller) + (let ((chars + (append-map transform (utf32-string->list string)))) - (let ((n (length svs))) + (let ((n (length chars))) (let ((result (make-utf32-string n))) - (do ((svs svs (cdr svs)) + (do ((chars chars (cdr chars)) (i 0 (fix:+ i 1))) - ((not (pair? svs))) - (utf32-string-set! result i (integer->char (car svs)))) + ((not (pair? chars))) + (utf32-string-set! result i (car chars))) result)))) ;; Incorrect, needs title-case implementation -- 2.25.1