From d4f91e23a33d5c76e91306eb4bbfb0192c622e41 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Sun, 19 Mar 2017 01:03:54 -0700 Subject: [PATCH] Implement decomposition-type table and use it for correct NFD conversion. --- src/etc/ucd-converter.scm | 57 +++++++++++++++------------ src/etc/ucd-raw-props/names.scm | 22 +++++++++-- src/runtime/runtime.pkg | 2 + src/runtime/ucd-table-dt.scm | 69 +++++++++++++++++++++++++++++++++ src/runtime/ustring.scm | 22 +++-------- 5 files changed, 128 insertions(+), 44 deletions(-) create mode 100644 src/runtime/ucd-table-dt.scm diff --git a/src/etc/ucd-converter.scm b/src/etc/ucd-converter.scm index 7cb477f30..26408d2bf 100644 --- a/src/etc/ucd-converter.scm +++ b/src/etc/ucd-converter.scm @@ -407,6 +407,7 @@ USA. "ccc" "cf" "dm" + "dt" "gc" "lc" "nt" @@ -420,17 +421,17 @@ USA. (define (generate-property-table prop-name) (let ((exprs (generate-property-table-code prop-name)) (ucd-version (read-ucd-version-file))) - (parameterize ((param:pp-forced-x-size 1000)) + (parameterize ((param:pp-forced-x-size 1000) + (param:unparse-char-in-unicode-syntax? #t)) (call-with-output-file (prop-table-file-name prop-name) (lambda (port) (write-copyright-and-title prop-name ucd-version port) (write-code-header port) - (parameterize ((param:unparse-char-in-unicode-syntax? #t)) - (print-code-expr (car exprs) port) - (for-each (lambda (expr) - (newline port) - (print-code-expr expr port)) - (cdr exprs)))))))) + (print-code-expr (car exprs) port) + (for-each (lambda (expr) + (newline port) + (print-code-expr expr port)) + (cdr exprs))))))) (define (prop-table-file-name prop-name) (string-append (->namestring output-file-root) @@ -465,10 +466,9 @@ USA. (define (metadata->code-generator metadata) (let ((name (metadata-name metadata)) (type-spec (metadata-type-spec metadata))) - (cond ((string=? name "GCB") code-generator:gcb) - ((string=? name "NFC_QC") code-generator:nfc-qc) + (cond ((string=? name "NFC_QC") code-generator:nfc-qc) ((string=? name "NFKC_QC") code-generator:nfc-qc) - ((string=? name "WB") code-generator:wb) + ((string=? name "dt") code-generator:dt) ((string=? name "gc") code-generator:gc) ((string=? name "nt") code-generator:nt) ((eq? type-spec 'boolean) code-generator:boolean) @@ -477,6 +477,7 @@ USA. ((eq? type-spec 'code-point*) code-generator:code-point*) ((eq? type-spec 'code-point+) code-generator:code-point+) ((eq? type-spec 'rational-or-nan) code-generator:rational-or-nan) + ((unmapped-enum-type? type-spec) code-generator:unmapped-enum) (else (error "Unsupported metadata:" metadata))))) (define (code-generator:boolean prop-name metadata prop-alist proc-name) @@ -508,28 +509,28 @@ USA. ((trie-code-generator value-manager:code-points) prop-name metadata prop-alist proc-name)) -(define (code-generator:gc prop-name metadata prop-alist proc-name) - ((trie-code-generator (mapped-enum-value-manager #f metadata)) +(define (code-generator:rational-or-nan prop-name metadata prop-alist proc-name) + ((trie-code-generator value-manager:rational-or-nan) prop-name metadata prop-alist proc-name)) -(define (code-generator:gcb prop-name metadata prop-alist proc-name) +(define (code-generator:unmapped-enum prop-name metadata prop-alist proc-name) ((trie-code-generator (unmapped-enum-value-manager #f metadata)) prop-name metadata prop-alist proc-name)) -(define (code-generator:nfc-qc prop-name metadata prop-alist proc-name) - ((trie-code-generator (mapped-enum-value-manager "Y" metadata)) +(define (code-generator:dt prop-name metadata prop-alist proc-name) + ((trie-code-generator (mapped-enum-value-manager "none" metadata)) prop-name metadata prop-alist proc-name)) -(define (code-generator:nt prop-name metadata prop-alist proc-name) - ((trie-code-generator (mapped-enum-value-manager "None" metadata)) +(define (code-generator:gc prop-name metadata prop-alist proc-name) + ((trie-code-generator (mapped-enum-value-manager #f metadata)) prop-name metadata prop-alist proc-name)) -(define (code-generator:rational-or-nan prop-name metadata prop-alist proc-name) - ((trie-code-generator value-manager:rational-or-nan) +(define (code-generator:nfc-qc prop-name metadata prop-alist proc-name) + ((trie-code-generator (mapped-enum-value-manager "Y" metadata)) prop-name metadata prop-alist proc-name)) -(define (code-generator:wb prop-name metadata prop-alist proc-name) - ((trie-code-generator (unmapped-enum-value-manager #f metadata)) +(define (code-generator:nt prop-name metadata prop-alist proc-name) + ((trie-code-generator (mapped-enum-value-manager "None" metadata)) prop-name metadata prop-alist proc-name)) (define (value-manager default-string converter @@ -537,17 +538,25 @@ USA. (make-value-manager default-string converter (if (default-object? runtime-default) - (let ((value + (let ((value-expr (and default-string - (converter default-string)))) + (maybe-quote + (converter default-string))))) (lambda (char-expr) char-expr - value)) + value-expr)) runtime-default) (if (default-object? runtime-converter) (lambda (sv-expr) sv-expr) runtime-converter))) +(define (maybe-quote object) + (if (or (symbol? object) + (pair? object) + (null? object)) + `',object + object)) + (define-record-type (make-value-manager default-string converter diff --git a/src/etc/ucd-raw-props/names.scm b/src/etc/ucd-raw-props/names.scm index 914582e8f..a1e2e3736 100644 --- a/src/etc/ucd-raw-props/names.scm +++ b/src/etc/ucd-raw-props/names.scm @@ -464,8 +464,24 @@ USA. ("cf" case-folding code-point+) ("dm" decomposition-mapping code-point*) ("dt" decomposition-type - (enum "can" "com" "enc" "fin" "font" "fra" "init" "iso" "med" - "nar" "nb" "sml" "sqr" "sub" "sup" "vert" "wide" "none")) + (enum ("can" . canonical) + ("com" . unspecified) + ("enc" . encircled) + ("fin" . final-presentation) + ("font" . font-variant) + ("fra" . vulgar-fraction) + ("init" . initial-presentation) + ("iso" . isolated-presentation) + ("med" . medial-presentation) + ("nar" . narrow-compatibility) + ("nb" . no-break-version) + ("sml" . small-variant) + ("sqr" . cjk-squared-font-variant) + ("sub" . subscript) + ("sup" . superscript) + ("vert" . vertical-layout-presentation) + ("wide" . wide-compatibility) + ("none" . #f))) ("ea" east-asian-width (enum "A" "F" "H" "N" "Na" "W")) ("gc" general-category (enum ("Lu" . letter:uppercase) @@ -504,7 +520,7 @@ USA. ("LVT" . lvt-syllable) ("T" . trailing-jamo) ("V" . vowel-jamo) - ("NA" . not-applicable))) + ("NA" . #f))) ("isc" iso-10646-comment string) ("jg" joining-group (enum "African_Feh" "African_Noon" "African_Qaf" diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index 66e6b317b..c9bc797fe 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -1273,6 +1273,7 @@ USA. "ucd-table-cwl" "ucd-table-cwu" "ucd-table-dm" + "ucd-table-dt" "ucd-table-gc" "ucd-table-gcb" "ucd-table-lc" @@ -1318,6 +1319,7 @@ USA. ucd-ccc-value ucd-cf-value ucd-dm-value + ucd-dt-value ucd-gcb-value ucd-lc-value ucd-tc-value diff --git a/src/runtime/ucd-table-dt.scm b/src/runtime/ucd-table-dt.scm new file mode 100644 index 000000000..ac7d33606 --- /dev/null +++ b/src/runtime/ucd-table-dt.scm @@ -0,0 +1,69 @@ +#| -*-Scheme-*- + +Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, + 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, + 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, + 2017 Massachusetts Institute of Technology + +This file is part of MIT/GNU Scheme. + +MIT/GNU Scheme is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at +your option) any later version. + +MIT/GNU Scheme is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with MIT/GNU Scheme; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, +USA. + +|# + +;;;; UCD property: dt (decomposition-type) + +;;; Generated from Unicode 9.0.0 + +(declare (usual-integrations)) + +(define (ucd-dt-value char) + (let ((sv (char->integer char))) + (vector-ref ucd-dt-table-5 (bytevector-u8-ref ucd-dt-table-4 (fix:or (fix:lsh (bytevector-u8-ref ucd-dt-table-3 (fix:or (fix:lsh (bytevector-u8-ref ucd-dt-table-2 (fix:or (fix:lsh (bytevector-u8-ref ucd-dt-table-1 (fix:or (fix:lsh (bytevector-u8-ref ucd-dt-table-0 (fix:lsh sv -16)) 4) (fix:and 15 (fix:lsh sv -12)))) 4) (fix:and 15 (fix:lsh sv -8)))) 4) (fix:and 15 (fix:lsh sv -4)))) 4) (fix:and 15 sv)))))) + +(define-deferred ucd-dt-table-0 + (vector->bytevector '#(0 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3))) + +(define-deferred ucd-dt-table-1 + (vector->bytevector '#(0 1 2 3 4 4 4 4 4 4 5 6 6 7 4 8 4 9 4 4 4 4 4 4 4 4 4 4 4 10 11 12 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 13 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4))) + +(define-deferred ucd-dt-table-2 + (vector->bytevector '#(0 1 2 3 4 5 6 7 7 8 9 10 11 12 13 14 15 7 7 7 7 7 7 7 7 7 7 16 7 17 18 19 20 21 22 23 24 7 7 7 7 7 25 7 26 27 28 29 30 31 32 33 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 34 35 7 7 7 36 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 38 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 37 39 40 41 42 43 44 45 46 7 47 48 49 7 7 7 7 7 7 7 7 7 7 7 50 7 7 51 52 53 54 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 55 7 7 56 57 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 37 37 58 7 7 7 7 7))) + +(define-deferred ucd-dt-table-3 + (vector->bytevector + '#(0 0 0 0 0 0 0 0 0 0 1 2 3 4 3 5 6 7 8 9 10 11 8 12 0 0 13 14 15 16 17 18 6 19 20 21 0 0 0 0 0 0 0 22 0 23 24 0 0 0 0 0 25 0 0 26 27 14 28 14 29 30 0 31 32 33 0 33 0 32 0 34 0 0 0 0 35 36 37 38 0 0 0 0 0 0 0 0 39 0 0 0 0 0 0 0 0 0 40 0 0 0 0 41 0 0 0 0 42 43 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 33 44 0 45 0 0 0 0 0 0 46 47 0 0 0 0 0 48 0 49 0 0 0 0 0 0 0 0 0 0 0 0 0 0 50 51 0 0 0 52 0 0 53 0 0 0 0 0 0 0 54 0 0 0 0 0 0 0 55 0 0 0 0 0 0 0 53 0 0 0 0 0 0 0 0 56 0 0 0 0 0 57 0 0 0 0 0 0 0 57 0 58 0 0 59 0 0 0 60 61 33 62 63 60 61 33 0 0 0 0 0 0 64 0 0 0 0 0 0 0 0 0 0 0 0 65 66 67 0 68 69 0 0 + 0 0 0 0 0 0 0 0 0 0 0 70 71 72 73 74 75 0 76 73 73 0 0 0 0 6 6 6 6 6 6 6 6 6 77 6 6 6 6 6 78 6 79 6 6 79 80 6 81 6 6 6 82 83 84 6 85 86 87 88 89 90 91 0 92 93 94 95 0 0 0 0 0 96 97 98 99 100 101 102 102 103 104 105 0 106 0 0 0 107 0 108 109 110 0 111 112 112 0 113 0 0 0 114 0 0 0 115 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 116 117 102 102 102 118 116 116 119 0 120 0 0 0 0 0 0 121 0 0 0 0 0 122 0 0 0 0 0 0 0 0 0 123 0 0 0 0 0 0 0 0 0 0 0 0 0 0 124 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 125 0 0 0 0 0 57 102 102 102 102 102 102 102 102 102 102 102 102 102 126 0 0 127 0 0 128 129 130 131 132 0 133 129 130 131 132 0 134 0 0 0 135 102 102 102 102 136 137 0 0 0 0 0 0 102 136 102 102 138 139 116 140 116 116 116 116 141 116 + 116 140 142 142 142 142 142 143 102 144 142 142 142 142 142 142 102 145 0 0 0 0 0 0 0 0 0 146 0 0 0 0 0 0 0 0 0 0 0 0 0 147 0 0 0 0 0 0 0 148 0 0 0 0 0 149 0 0 0 0 0 0 0 0 0 0 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 21 0 0 0 0 0 81 150 151 6 6 6 81 6 6 6 6 6 6 78 0 0 152 153 154 155 156 157 158 158 159 158 160 161 0 162 163 164 165 165 165 165 165 165 166 167 167 168 169 169 169 170 171 172 165 173 174 175 0 176 177 178 179 180 167 181 182 0 0 183 0 184 0 185 186 187 188 189 190 191 192 193 194 194 195 196 197 198 198 198 198 198 199 200 200 200 200 201 202 203 204 0 0 0 0 0 0 0 0 0 0 205 206 0 0 0 0 0 0 0 207 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 46 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 208 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 104 0 0 0 0 0 0 0 0 0 207 209 0 0 0 0 210 14 0 0 0 211 211 211 211 211 212 211 211 211 213 214 215 216 211 211 211 217 218 211 219 220 221 211 211 211 211 211 211 211 211 211 211 211 211 211 211 211 211 211 211 211 211 222 211 211 211 211 211 211 211 211 211 211 211 211 211 211 211 211 211 223 211 211 211 216 211 224 225 226 227 228 229 230 231 232 231 0 0 0 0 233 102 234 142 142 0 235 0 0 236 0 0 0 0 0 0 237 142 142 238 239 240 0 0 0 0 0 0 0 0 0 0 6 81 0 0 0 0 0 0 0 0 0 0 0 0 0 0))) + +(define-deferred ucd-dt-table-4 + (vector->bytevector + '#(0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 2 0 3 0 0 0 0 2 0 0 3 3 2 2 0 0 2 3 3 0 4 4 4 0 5 5 5 5 5 5 0 5 5 5 5 5 5 5 5 5 0 5 5 5 5 5 5 0 0 5 5 5 5 5 0 0 0 5 5 5 5 5 5 0 0 5 5 5 5 5 0 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 0 0 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 0 0 5 5 5 5 5 5 5 5 5 0 2 2 5 5 5 5 0 5 5 5 5 5 5 2 2 0 0 5 5 5 5 5 5 2 0 0 5 5 5 5 5 5 0 0 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 2 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 0 5 5 5 5 5 5 0 0 5 5 5 5 5 5 5 5 5 5 5 2 2 2 5 5 0 0 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 0 0 5 5 0 0 0 0 0 0 5 5 5 5 5 + 5 5 5 5 5 5 5 5 5 0 0 0 0 0 0 0 0 0 0 0 0 3 3 3 3 3 3 3 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 0 0 3 3 3 3 3 0 0 0 0 0 0 0 0 0 0 0 5 5 0 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 2 0 0 0 5 0 0 0 0 0 2 5 5 5 5 5 5 0 5 0 5 5 0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 5 0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 0 2 2 2 5 5 2 2 0 0 0 0 0 0 0 0 0 2 2 2 0 2 2 0 0 0 2 0 0 0 0 0 0 5 5 0 5 0 0 0 5 0 0 0 0 5 5 5 0 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 5 5 0 0 0 0 0 0 0 0 0 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 5 0 0 5 5 0 0 5 5 5 5 5 5 0 0 5 5 5 5 5 5 0 0 5 5 5 5 5 5 5 5 5 5 5 5 0 0 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 + 2 2 2 0 0 0 0 0 0 0 5 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 5 5 5 0 0 0 0 0 0 0 0 0 0 0 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 0 5 0 0 0 5 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 0 0 5 0 0 0 0 0 0 0 0 0 5 0 0 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 0 0 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 0 0 5 0 0 0 0 0 0 5 5 0 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 5 5 5 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 5 0 0 0 0 5 0 0 0 0 5 0 0 0 0 5 0 0 0 0 + 0 0 5 0 5 5 2 5 2 0 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 5 0 5 0 5 0 5 0 5 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 5 0 0 5 5 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 3 0 3 3 3 3 3 3 3 3 3 3 3 0 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 0 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 6 6 6 6 6 6 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 3 3 3 5 5 5 5 5 5 5 5 5 5 2 5 0 0 0 0 5 5 5 5 5 5 5 5 5 5 0 0 0 0 0 0 5 5 5 5 5 5 0 0 5 5 5 5 5 5 0 0 5 5 5 5 5 5 5 5 0 5 0 5 0 5 0 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 0 0 5 5 5 5 5 0 5 5 5 5 5 5 + 5 2 5 2 2 5 5 5 5 0 5 5 5 5 5 5 5 5 5 5 5 5 5 5 0 0 5 5 5 5 5 5 0 5 5 5 0 0 5 5 5 0 5 5 5 5 5 5 5 5 2 0 5 5 2 2 2 2 2 1 2 2 2 0 0 0 0 0 0 1 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 0 0 0 1 0 0 0 2 2 0 2 2 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 2 3 3 0 0 3 3 3 3 3 3 3 3 3 3 3 3 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 0 6 6 6 6 6 6 6 6 6 6 6 6 6 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 2 2 7 2 0 2 2 2 0 2 7 7 7 7 7 7 7 7 7 7 0 7 2 0 0 7 7 7 7 7 0 0 3 2 3 0 7 0 5 0 7 0 5 5 7 7 0 7 7 7 0 7 7 2 2 2 2 7 0 2 7 7 7 7 7 0 0 0 0 7 7 7 7 7 0 0 0 0 0 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 0 0 0 0 0 0 + 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 0 0 0 0 5 0 0 0 0 5 0 0 5 0 0 0 0 0 0 0 5 0 5 0 0 0 0 0 2 2 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 0 5 0 0 5 0 5 0 0 0 0 0 0 5 0 5 0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 0 0 5 5 0 0 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 5 5 5 0 0 0 0 0 0 5 5 5 5 0 0 0 0 0 0 0 0 0 0 0 5 5 0 0 0 0 0 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 3 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 5 0 5 0 5 0 5 0 5 0 5 0 5 0 5 0 5 0 5 0 5 0 0 5 0 5 0 5 0 0 0 0 0 0 5 5 0 5 5 0 5 5 0 5 5 0 5 5 0 0 0 0 0 0 5 0 0 0 0 0 0 2 2 0 5 10 0 0 0 0 5 0 0 5 5 5 5 0 0 0 5 10 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 0 0 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 2 2 8 8 8 8 0 0 0 0 0 0 0 0 11 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 0 2 2 2 2 2 2 2 2 2 2 2 2 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 2 2 2 2 2 2 2 2 2 11 11 11 11 11 11 11 11 11 11 11 11 + 11 11 11 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 11 0 0 0 0 0 0 0 0 0 0 0 0 3 3 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 3 3 5 0 5 0 0 5 5 5 5 5 5 5 5 5 5 0 5 0 5 0 0 5 5 0 0 0 5 5 5 5 5 5 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 0 0 0 0 0 5 0 5 7 7 7 7 7 7 7 7 7 7 5 5 5 5 5 5 5 5 5 5 5 5 5 0 5 5 5 5 5 0 5 0 5 5 0 5 5 0 5 5 5 5 5 5 5 5 5 2 12 13 12 13 14 15 12 13 14 15 12 13 14 15 12 13 14 15 12 13 14 15 12 13 14 15 12 13 14 15 12 13 14 15 12 13 12 13 12 13 12 13 12 13 12 13 12 13 12 13 14 15 12 13 12 13 14 15 12 13 14 15 12 13 12 13 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12 13 14 15 12 13 12 13 12 13 12 12 13 12 13 12 13 12 13 14 15 14 15 12 13 12 13 12 13 12 13 12 13 12 13 12 13 14 12 13 14 12 13 14 15 12 12 12 12 12 12 12 12 + 12 12 12 12 12 12 12 12 12 12 12 12 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 12 12 12 12 12 12 12 12 12 12 12 12 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 14 14 14 14 14 14 14 15 15 15 15 15 15 15 15 13 12 0 0 14 13 14 14 14 14 14 14 13 14 13 13 14 14 13 13 14 14 13 14 13 14 13 13 14 13 13 14 13 14 13 13 14 13 14 14 13 13 13 14 13 13 13 13 13 14 13 13 13 13 13 14 13 13 14 13 14 14 14 13 14 14 14 14 0 0 14 14 14 14 13 13 14 13 13 13 13 14 13 13 13 13 13 13 14 14 13 13 14 13 14 13 13 13 13 13 13 13 13 14 14 14 13 13 0 0 0 0 0 0 0 0 12 12 12 12 12 12 12 12 12 12 12 12 12 0 0 0 10 10 10 10 10 10 10 10 10 10 0 0 0 0 0 0 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + 10 10 0 0 10 10 2 2 2 2 2 2 2 16 16 16 0 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 0 16 16 16 16 0 0 0 0 12 15 12 0 12 0 12 15 12 15 12 15 12 15 12 15 12 12 13 12 13 12 13 12 13 12 13 14 15 12 13 12 13 14 15 12 13 12 13 14 15 12 13 14 15 12 13 14 15 12 13 14 15 12 13 14 15 12 13 12 13 12 13 12 13 12 13 14 15 12 13 14 15 12 13 14 15 12 13 14 15 12 13 14 15 12 13 14 15 12 13 14 15 12 13 14 15 12 13 14 15 12 13 14 15 12 13 14 15 12 13 12 13 12 13 14 15 12 13 12 13 12 13 12 13 0 0 0 0 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 17 0 0 0 17 17 17 17 17 17 0 0 17 17 17 17 17 17 0 0 17 17 17 17 17 17 0 0 17 17 17 0 0 0 9 9 9 9 9 9 9 0 17 17 17 17 17 17 17 0 0 0 0 0 0 0 0 0 0 0 5 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 0 0 0 0 0 0 0 0 0 0 0 5 5 0 5 0 5 5 5 5 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 5 5 5 5 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 0 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 0 7 7 0 0 7 0 0 7 7 0 0 7 7 7 7 0 7 7 7 7 7 7 7 7 7 7 7 7 0 7 0 7 7 7 7 7 7 7 0 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 0 7 7 7 7 0 0 7 7 7 7 7 7 7 7 0 7 7 7 7 7 7 7 0 7 7 7 7 7 7 7 7 7 7 7 7 0 7 7 7 7 0 7 7 7 7 7 0 7 0 0 0 7 7 7 7 7 7 7 0 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 0 0 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 0 0 7 7 0 7 7 0 7 0 0 7 0 7 7 7 7 7 7 7 7 7 7 0 7 7 7 7 0 7 0 7 0 0 0 0 0 0 7 0 0 0 0 7 0 7 0 7 0 7 7 7 0 7 7 0 7 0 0 7 0 + 7 0 7 0 7 0 7 0 7 7 0 7 0 0 7 7 7 7 0 7 7 7 7 7 7 7 0 7 7 7 7 0 7 7 7 7 0 7 0 7 7 7 7 7 7 7 7 7 7 0 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 0 0 0 0 0 7 7 7 0 7 7 7 7 7 0 7 7 7 7 7 2 2 2 2 2 2 2 2 2 2 2 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 8 8 8 8 0 0 0 0 0 0 0 0 0 0 0 3 3 0 0 0 0 11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 11 11 0 0 0 0 0 0 0 0 0 0 0 0 0 11 11 11 11 11 11 11 11 11 11 11 11 0 0 0 0 2 2 2 2 2 2 2 2 2 0 0 0 0 0 0 0 8 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0))) + +(define-deferred ucd-dt-table-5 + #(#f no-break-version unspecified superscript vulgar-fraction canonical subscript font-variant encircled wide-compatibility vertical-layout-presentation cjk-squared-font-variant isolated-presentation final-presentation initial-presentation medial-presentation small-variant narrow-compatibility)) diff --git a/src/runtime/ustring.scm b/src/runtime/ustring.scm index 2e08dabf3..b4a39f397 100644 --- a/src/runtime/ustring.scm +++ b/src/runtime/ustring.scm @@ -543,7 +543,11 @@ USA. (builder (string-builder))) (do ((i 0 (fix:+ i 1))) ((not (fix:< i end))) - (builder (ucd-dm-value (string-ref string i)))) + (builder + (let ((char (string-ref string i))) + (if (eq? 'canonical (ucd-dt-value char)) + (ucd-dm-value char) + char)))) (builder))) (define (canonical-ordering! string) @@ -573,22 +577,6 @@ USA. (scan-for-non-starter 0)) string) - -(define (quick-check string qc-value) - (let ((n (string-length string))) - (let loop ((i 0) (last-ccc 0) (result #t)) - (if (fix:< i n) - (let* ((char (string-ref string i)) - (ccc (ucd-ccc-value char))) - (if (and (fix:> ccc 0) - (fix:< ccc last-ccc)) - #f - (let ((check (qc-value char))) - (and check - (if (eq? check 'maybe) - (loop (fix:+ i 1) ccc check) - (loop (fix:+ i 1) ccc result)))))) - result)))) ;;;; Grapheme clusters -- 2.25.1