From: Chris Hanson Date: Sat, 25 Mar 2017 22:19:56 +0000 (-0700) Subject: Initial draft of NFC support; still need to write composition. X-Git-Tag: mit-scheme-pucked-9.2.12~158^2~74 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=6bc884cb05548797342d368d1f1dc29459a99a83;p=mit-scheme.git Initial draft of NFC support; still need to write composition. --- diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index 2e851479c..b03755f8f 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -974,6 +974,7 @@ USA. string string* string->list + string->nfc string->nfd string->vector string-any @@ -1008,6 +1009,8 @@ USA. string-hash string-hash-ci string-head + string-in-nfc? + string-in-nfd? string-joiner string-joiner* string-length @@ -1270,6 +1273,7 @@ USA. "ucd-table-cased" "ucd-table-ccc" "ucd-table-cf" + "ucd-table-comp_ex" "ucd-table-cwcf" "ucd-table-cwl" "ucd-table-cwu" @@ -1277,6 +1281,7 @@ USA. "ucd-table-gcb" "ucd-table-lc" "ucd-table-lower" + "ucd-table-nfc_qc" "ucd-table-nfd_qc" "ucd-table-nt" "ucd-table-nv" @@ -1314,12 +1319,14 @@ USA. char-changes-when-case-folded? char-changes-when-lower-cased? char-changes-when-upper-cased? + char-full-composition-exclusion? char-nfd-quick-check? ucd-ccc-value ucd-cf-value ucd-canonical-dm-value ucd-gcb-value ucd-lc-value + ucd-nfc_qc-value ucd-tc-value ucd-uc-value ucd-wb-value)) diff --git a/src/runtime/ustring.scm b/src/runtime/ustring.scm index 9b55f077f..a3de91069 100644 --- a/src/runtime/ustring.scm +++ b/src/runtime/ustring.scm @@ -513,31 +513,45 @@ USA. ;;;; Normalization (define (string->nfd string) - (if (or (string-ascii? string) ;ASCII unaffected by normalization - (string-in-nfd? string)) + (if (string-in-nfd? string) string (canonical-ordering! (canonical-decomposition string)))) -(define (string-ascii? string) - (let ((n (string-length string))) - (let loop ((i 0)) - (if (fix:< i n) - (and (char-ascii? (string-ref string i)) - (loop (fix:+ i 1))) - #t)))) +(define (string->nfc string) + (if (string-in-nfc? string) + string + (canonical-composition (string->nfd string)))) (define (string-in-nfd? string) - (let ((n (string-length string))) + (let ((end (string-length string))) (let loop ((i 0) (last-ccc 0)) - (if (fix:< i n) - (let* ((char (string-ref string i)) - (ccc (ucd-ccc-value char))) - (and (or (fix:= ccc 0) - (fix:>= ccc last-ccc)) - (char-nfd-quick-check? char) - (loop (fix:+ i 1) ccc))) + (if (fix:< i end) + (let ((char (string-ref string i))) + (if (fix:< (char->integer char) #xC0) + (loop (fix:+ i 1) 0) + (let ((ccc (ucd-ccc-value char))) + (and (or (fix:= ccc 0) + (fix:>= ccc last-ccc)) + (char-nfd-quick-check? char) + (loop (fix:+ i 1) ccc))))) #t)))) +(define (string-in-nfc? string) + (let ((end (string-length string))) + (let loop ((i 0) (last-ccc 0) (result #t)) + (if (fix:< i end) + (let ((char (string-ref string i))) + (if (fix:< (char->integer char) #x300) + (loop (fix:+ i 1) 0 result) + (let ((ccc (ucd-ccc-value char))) + (and (or (fix:= ccc 0) + (fix:>= ccc last-ccc)) + (case (ucd-nfc_qc-value char) + ((yes) (loop (fix:+ i 1) ccc result)) + ((maybe) (loop (fix:+ i 1) ccc 'maybe)) + (else #f)))))) + result)))) + (define (canonical-decomposition string) (let ((end (string-length string)) (builder (string-builder))) @@ -587,6 +601,9 @@ USA. (scan-for-non-starter 0)) string) + +(define (canonical-composition string) + string) ;;;; Grapheme clusters