From 302cdd4be73c5b0465c88e28a96fd63840dfa66e Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Mon, 27 Feb 2017 00:07:26 -0800 Subject: [PATCH] Implement string-titlecase. --- src/runtime/char.scm | 6 --- src/runtime/runtime.pkg | 14 ++--- src/runtime/ustring.scm | 113 ++++++++++++++++++++++++++-------------- 3 files changed, 80 insertions(+), 53 deletions(-) diff --git a/src/runtime/char.scm b/src/runtime/char.scm index 14f72e133..ee9a86b56 100644 --- a/src/runtime/char.scm +++ b/src/runtime/char.scm @@ -152,9 +152,6 @@ USA. (define char-downcase) (define char-foldcase) (define char-upcase) -(define char-downcase-full) -(define char-foldcase-full) -(define char-upcase-full) (add-boot-init! (lambda () @@ -168,9 +165,6 @@ USA. (set! char-downcase (char-mapper ucd-slc-value)) (set! char-foldcase (char-mapper ucd-scf-value)) (set! char-upcase (char-mapper ucd-suc-value)) - (set! char-downcase-full (char-mapper ucd-lc-value)) - (set! char-foldcase-full (char-mapper ucd-cf-value)) - (set! char-upcase-full (char-mapper ucd-uc-value)) unspecific)) (define (digit-value char) diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index 8bef04f12..64c2daf38 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -1275,9 +1275,6 @@ USA. unicode-code-point? unicode-scalar-value?) (export (runtime) - char-downcase-full - char-foldcase-full - char-upcase-full char-utf16-byte-length char-utf32-byte-length char-utf8-byte-length @@ -1313,6 +1310,7 @@ USA. "ucd-table-scf" "ucd-table-slc" "ucd-table-suc" + "ucd-table-tc" "ucd-table-uc" "ucd-table-upper" "ucd-table-wb" @@ -1329,16 +1327,18 @@ USA. char-set:lower-case char-set:upper-case char-set:whitespace) + (export (runtime) + (char-downcase-full ucd-lc-value) + (char-foldcase-full ucd-cf-value) + (char-titlecase-full ucd-tc-value) + (char-upcase-full ucd-uc-value)) (export (runtime character) - ucd-cf-value ucd-gc-value - ucd-lc-value ucd-nt-value ucd-nv-value ucd-scf-value ucd-slc-value - ucd-suc-value - ucd-uc-value) + ucd-suc-value) (export (runtime ucd-glue) char-set:changes-when-case-folded ucd-nt-value) diff --git a/src/runtime/ustring.scm b/src/runtime/ustring.scm index d740b6b11..0bbb33e31 100644 --- a/src/runtime/ustring.scm +++ b/src/runtime/ustring.scm @@ -438,22 +438,36 @@ USA. (define (string-upcase string) (case-transform char-upcase-full string)) +(define (case-transform transform string) + (let ((builder (string-builder)) + (end (string-length string))) + (do ((index 0 (fix:+ index 1))) + ((not (fix:< index end))) + (for-each builder (transform (string-ref string index)))) + (builder))) + (define (string-titlecase string) - ;; TODO(cph): implement this - (string-copy string)) + (let ((builder (string-builder))) + (find-word-breaks string 0 + (lambda (end start) + (maybe-titlecase string start end builder) + end)) + (builder))) -(define (case-transform transform string) - (let ((chars (append-map transform (string->list string)))) - (let ((n (length chars))) - (let ((result - (if (every char-8-bit? chars) - (legacy-string-allocate n) - (full-string-allocate n)))) - (do ((chars chars (cdr chars)) - (i 0 (fix:+ i 1))) - ((not (pair? chars))) - (string-set! result i (car chars))) - result)))) +(define (maybe-titlecase string start end builder) + (let loop ((index start)) + (if (fix:< index end) + (let ((char (string-ref string index))) + (if (char-cased? char) + (begin + (for-each builder (char-titlecase-full char)) + (do ((index (fix:+ index 1) (fix:+ index 1))) + ((not (fix:< index end))) + (for-each builder + (char-downcase-full (string-ref string index))))) + (begin + (builder char) + (loop (fix:+ index 1)))))))) (define (string-lower-case? string) (let* ((nfd (string->nfd string)) @@ -789,27 +803,28 @@ USA. (state:emoji-base (standard-state (make-!selector wb-names '(emoji-modifier))))) - (vector (let ((break? (make-!selector wb-names '(linefeed)))) + (vector (let ((break? ;carriage-return + (make-!selector wb-names '(linefeed)))) (lambda (wb1 wb2 k0 k1 k2) (declare (ignore wb2 k0 k2)) (k1 wb1 (break? wb1)))) - state:default - state:emoji-base - state:emoji-base - state:default - (standard-state + state:default ;double-quote + state:emoji-base ;emoji-base + state:emoji-base ;emoji-base-gaz + state:default ;emoji-modifier + (standard-state ;extend-num-let (make-!selector wb-names '(extend-num-let hebrew-letter katakana letter numeric))) - state:default - state:default - state:default - (let ((select:dq (make-selector wb-names '(double-quote))) - (select:hl (make-selector wb-names '(hebrew-letter))) - (break? + state:default ;extend + state:default ;format + state:default ;glue-after-zwj + (let ((break? ;hebrew-letter (make-!selector wb-names '(extend-num-let hebrew-letter letter - numeric single-quote)))) + numeric single-quote))) + (select:dq (make-selector wb-names '(double-quote))) + (select:hl (make-selector wb-names '(hebrew-letter)))) (lambda (wb1 wb2 k0 k1 k2) (cond ((select:extender wb1) (k0 #f)) @@ -823,9 +838,9 @@ USA. (k2 wb2 #f)) (else (k1 wb1 (break? wb1)))))) - (standard-state + (standard-state ;katakana (make-!selector wb-names '(extend-num-let katakana))) - (let ((break? + (let ((break? ;letter (make-!selector wb-names '(extend-num-let hebrew-letter letter numeric)))) @@ -838,14 +853,30 @@ USA. (k2 wb2 #f)) (else (k1 wb1 (break? wb1)))))) - state:always-break - state:default - state:default - state:default - state:always-break - (standard-state - (make-!selector wb-names - '(extend-num-let hebrew-letter letter numeric))) + state:always-break ;linefeed + state:default ;mid-num-let + state:default ;mid-letter + state:default ;mid-number + state:always-break ;newline + (let ((break? ;numeric + (make-!selector wb-names + '(extend-num-let hebrew-letter letter + numeric))) + (select:mb/mn/sq + (make-selector wb-names + '(mid-num-let mid-number single-quote))) + (select:numeric + (make-selector wb-names '(numeric)))) + (lambda (wb1 wb2 k0 k1 k2) + (cond ((select:extender wb1) + (k0 #f)) + ((and wb2 + (select:mb/mn/sq wb1) + (select:numeric wb2)) + (k2 wb2 #f)) + (else + (k1 wb1 (break? wb1)))))) + ;; regional-indicator (let ((select:regional-indicator (make-selector wb-names '(regional-indicator))) (wb:extend (name->code wb-names 'extend))) @@ -857,9 +888,11 @@ USA. (k1 wb:extend #f)) (else (k1 wb1 #t))))) - state:default - state:default - state:default))))) + state:default ;single-quote + state:default ;other + (standard-state ;zwj + (make-!selector wb-names '(emoji-base-gaz glue-after-zwj))) + ))))) (define (list->string chars) (if (every char-8-bit? chars) -- 2.25.1