Implement string-titlecase.
authorChris Hanson <org/chris-hanson/cph>
Mon, 27 Feb 2017 08:07:26 +0000 (00:07 -0800)
committerChris Hanson <org/chris-hanson/cph>
Mon, 27 Feb 2017 08:07:26 +0000 (00:07 -0800)
src/runtime/char.scm
src/runtime/runtime.pkg
src/runtime/ustring.scm

index 14f72e13391f2b962a559f78c84e20d12c542e7d..ee9a86b56d5103cbc6609bb0ffaea69e4b088a94 100644 (file)
@@ -152,9 +152,6 @@ USA.
 (define char-downcase)
 (define char-foldcase)
 (define char-upcase)
-(define char-downcase-full)
-(define char-foldcase-full)
-(define char-upcase-full)
 (add-boot-init!
  (lambda ()
 
@@ -168,9 +165,6 @@ USA.
    (set! char-downcase (char-mapper ucd-slc-value))
    (set! char-foldcase (char-mapper ucd-scf-value))
    (set! char-upcase (char-mapper ucd-suc-value))
-   (set! char-downcase-full (char-mapper ucd-lc-value))
-   (set! char-foldcase-full (char-mapper ucd-cf-value))
-   (set! char-upcase-full (char-mapper ucd-uc-value))
    unspecific))
 \f
 (define (digit-value char)
index 8bef04f122bc091d8f78915ff7c52ca88a3d26b1..64c2daf381dbb686241ac0a07a3566f63da79d15 100644 (file)
@@ -1275,9 +1275,6 @@ USA.
          unicode-code-point?
          unicode-scalar-value?)
   (export (runtime)
-         char-downcase-full
-         char-foldcase-full
-         char-upcase-full
          char-utf16-byte-length
          char-utf32-byte-length
          char-utf8-byte-length
@@ -1313,6 +1310,7 @@ USA.
         "ucd-table-scf"
         "ucd-table-slc"
         "ucd-table-suc"
+        "ucd-table-tc"
         "ucd-table-uc"
         "ucd-table-upper"
         "ucd-table-wb"
@@ -1329,16 +1327,18 @@ USA.
          char-set:lower-case
          char-set:upper-case
          char-set:whitespace)
+  (export (runtime)
+         (char-downcase-full ucd-lc-value)
+         (char-foldcase-full ucd-cf-value)
+         (char-titlecase-full ucd-tc-value)
+         (char-upcase-full ucd-uc-value))
   (export (runtime character)
-         ucd-cf-value
          ucd-gc-value
-         ucd-lc-value
          ucd-nt-value
          ucd-nv-value
          ucd-scf-value
          ucd-slc-value
-         ucd-suc-value
-         ucd-uc-value)
+         ucd-suc-value)
   (export (runtime ucd-glue)
          char-set:changes-when-case-folded
          ucd-nt-value)
index d740b6b11b7228cb6da849b0becf18c9484a2c42..0bbb33e31d67791e463e1d7495aec393661fa1b1 100644 (file)
@@ -438,22 +438,36 @@ USA.
 (define (string-upcase string)
   (case-transform char-upcase-full string))
 
+(define (case-transform transform string)
+  (let ((builder (string-builder))
+       (end (string-length string)))
+    (do ((index 0 (fix:+ index 1)))
+       ((not (fix:< index end)))
+      (for-each builder (transform (string-ref string index))))
+    (builder)))
+
 (define (string-titlecase string)
-  ;; TODO(cph): implement this
-  (string-copy string))
+  (let ((builder (string-builder)))
+    (find-word-breaks string 0
+                     (lambda (end start)
+                       (maybe-titlecase string start end builder)
+                       end))
+    (builder)))
 
-(define (case-transform transform string)
-  (let ((chars (append-map transform (string->list string))))
-    (let ((n (length chars)))
-      (let ((result
-            (if (every char-8-bit? chars)
-                (legacy-string-allocate n)
-                (full-string-allocate n))))
-       (do ((chars chars (cdr chars))
-            (i 0 (fix:+ i 1)))
-           ((not (pair? chars)))
-         (string-set! result i (car chars)))
-       result))))
+(define (maybe-titlecase string start end builder)
+  (let loop ((index start))
+    (if (fix:< index end)
+       (let ((char (string-ref string index)))
+         (if (char-cased? char)
+             (begin
+               (for-each builder (char-titlecase-full char))
+               (do ((index (fix:+ index 1) (fix:+ index 1)))
+                   ((not (fix:< index end)))
+                 (for-each builder
+                           (char-downcase-full (string-ref string index)))))
+             (begin
+               (builder char)
+               (loop (fix:+ index 1))))))))
 
 (define (string-lower-case? string)
   (let* ((nfd (string->nfd string))
@@ -789,27 +803,28 @@ USA.
            (state:emoji-base
             (standard-state (make-!selector wb-names '(emoji-modifier)))))
 
-       (vector (let ((break? (make-!selector wb-names '(linefeed))))
+       (vector (let ((break?           ;carriage-return
+                      (make-!selector wb-names '(linefeed))))
                  (lambda (wb1 wb2 k0 k1 k2)
                    (declare (ignore wb2 k0 k2))
                    (k1 wb1 (break? wb1))))
-               state:default
-               state:emoji-base
-               state:emoji-base
-               state:default
-               (standard-state
+               state:default           ;double-quote
+               state:emoji-base        ;emoji-base
+               state:emoji-base        ;emoji-base-gaz
+               state:default           ;emoji-modifier
+               (standard-state         ;extend-num-let
                 (make-!selector wb-names
                                 '(extend-num-let hebrew-letter katakana letter
                                                  numeric)))
-               state:default
-               state:default
-               state:default
-               (let ((select:dq (make-selector wb-names '(double-quote)))
-                     (select:hl (make-selector wb-names '(hebrew-letter)))
-                     (break?
+               state:default           ;extend
+               state:default           ;format
+               state:default           ;glue-after-zwj
+               (let ((break?           ;hebrew-letter
                       (make-!selector wb-names
                                       '(extend-num-let hebrew-letter letter
-                                                       numeric single-quote))))
+                                                       numeric single-quote)))
+                     (select:dq (make-selector wb-names '(double-quote)))
+                     (select:hl (make-selector wb-names '(hebrew-letter))))
                  (lambda (wb1 wb2 k0 k1 k2)
                    (cond ((select:extender wb1)
                           (k0 #f))
@@ -823,9 +838,9 @@ USA.
                           (k2 wb2 #f))
                          (else
                           (k1 wb1 (break? wb1))))))
-               (standard-state
+               (standard-state         ;katakana
                 (make-!selector wb-names '(extend-num-let katakana)))
-               (let ((break?
+               (let ((break?           ;letter
                       (make-!selector wb-names
                                       '(extend-num-let hebrew-letter letter
                                                        numeric))))
@@ -838,14 +853,30 @@ USA.
                           (k2 wb2 #f))
                          (else
                           (k1 wb1 (break? wb1))))))
-               state:always-break
-               state:default
-               state:default
-               state:default
-               state:always-break
-               (standard-state
-                (make-!selector wb-names
-                                '(extend-num-let hebrew-letter letter numeric)))
+               state:always-break      ;linefeed
+               state:default           ;mid-num-let
+               state:default           ;mid-letter
+               state:default           ;mid-number
+               state:always-break      ;newline
+               (let ((break?           ;numeric
+                      (make-!selector wb-names
+                                      '(extend-num-let hebrew-letter letter
+                                                       numeric)))
+                     (select:mb/mn/sq
+                      (make-selector wb-names
+                                     '(mid-num-let mid-number single-quote)))
+                     (select:numeric
+                      (make-selector wb-names '(numeric))))
+                 (lambda (wb1 wb2 k0 k1 k2)
+                   (cond ((select:extender wb1)
+                          (k0 #f))
+                         ((and wb2
+                               (select:mb/mn/sq wb1)
+                               (select:numeric wb2))
+                          (k2 wb2 #f))
+                         (else
+                          (k1 wb1 (break? wb1))))))
+               ;; regional-indicator
                (let ((select:regional-indicator
                       (make-selector wb-names '(regional-indicator)))
                      (wb:extend (name->code wb-names 'extend)))
@@ -857,9 +888,11 @@ USA.
                           (k1 wb:extend #f))
                          (else
                           (k1 wb1 #t)))))
-               state:default
-               state:default
-               state:default)))))
+               state:default           ;single-quote
+               state:default           ;other
+               (standard-state         ;zwj
+                (make-!selector wb-names '(emoji-base-gaz glue-after-zwj)))
+               )))))
 \f
 (define (list->string chars)
   (if (every char-8-bit? chars)