From b86c245fefb37b76cdc9d686781114852e05a12d Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Wed, 15 Feb 2017 01:23:32 -0800 Subject: [PATCH] Account for the fact that UCD procedure accept all code points. --- src/runtime/char.scm | 121 +++++++++++------------------ src/runtime/predicate-metadata.scm | 1 + src/runtime/runtime.pkg | 39 +++++----- src/runtime/ucd-glue.scm | 4 +- 4 files changed, 69 insertions(+), 96 deletions(-) diff --git a/src/runtime/char.scm b/src/runtime/char.scm index 2b7195024..4ddd4a9a6 100644 --- a/src/runtime/char.scm +++ b/src/runtime/char.scm @@ -48,10 +48,6 @@ USA. (define-integrable (%make-char code bits) (integer->char (fix:or (fix:lsh bits 21) code))) -(define (code->char code) - (guarantee-limited-index-fixnum code char-code-limit 'CODE->CHAR) - (integer->char code)) - (define (char-code char) (fix:and (char->integer char) #x1FFFFF)) @@ -106,19 +102,19 @@ USA. (fix:>= (char->integer x) (char->integer y))) (define (char-ci=? x y) - (char=? (ucd-scf-value x) (ucd-scf-value y))) + (char=? (char-foldcase x) (char-foldcase y))) (define (char-ci? x y) - (char>? (ucd-scf-value x) (ucd-scf-value y))) + (char>? (char-foldcase x) (char-foldcase y))) (define (char-ci>=? x y) - (char>=? (ucd-scf-value x) (ucd-scf-value y))) + (char>=? (char-foldcase x) (char-foldcase y))) (define (char=-predicate char) (guarantee char? char 'char=-predicate) @@ -129,49 +125,30 @@ USA. (guarantee char? char 'char-ci=-predicate) (lambda (char*) (char-ci=? char* char))) - -(define (char-mapper mapper) - (lambda (char) - (if (fix:= 0 (char-bits char)) - (mapper char) - (%make-char (mapper (%make-char (char-code char) 0)) - (char-bits char))))) - -(define char-downcase - (char-mapper - (lambda (char) - (guarantee unicode-char? char 'char-downcase) - (ucd-slc-value char)))) - -(define char-foldcase - (char-mapper - (lambda (char) - (guarantee unicode-char? char 'char-foldcase) - (ucd-scf-value char)))) - -(define char-upcase - (char-mapper - (lambda (char) - (guarantee unicode-char? char 'char-upcase) - (ucd-suc-value char)))) - -(define char-downcase-full - (char-mapper - (lambda (char) - (guarantee unicode-char? char 'char-downcase-full) - (ucd-lc-value char)))) - -(define char-foldcase-full - (char-mapper - (lambda (char) - (guarantee unicode-char? char 'char-foldcase-full) - (ucd-cf-value char)))) - -(define char-upcase-full - (char-mapper - (lambda (char) - (guarantee unicode-char? char 'char-upcase-full) - (ucd-uc-value char)))) + +(define char-downcase) +(define char-foldcase) +(define char-upcase) +(define char-downcase-full) +(define char-foldcase-full) +(define char-upcase-full) +(add-boot-init! + (lambda () + + (define (char-mapper mapper) + (lambda (char) + (if (fix:= 0 (char-bits char)) + (mapper char) + (%make-char (mapper (%make-char (char-code char) 0)) + (char-bits char))))) + + (set! char-downcase (char-mapper ucd-slc-value)) + (set! char-foldcase (char-mapper ucd-scf-value)) + (set! char-upcase (char-mapper ucd-suc-value)) + (set! char-downcase-full (char-mapper ucd-lc-value)) + (set! char-foldcase-full (char-mapper ucd-cf-value)) + (set! char-upcase-full (char-mapper ucd-uc-value)) + unspecific)) (define (digit-value char) (and (char-numeric? char) @@ -373,46 +350,42 @@ USA. (and (char? object) (unicode-char-code? (char->integer object)))) +(define (base-char? object) + (and (char? object) + (unicode-code-point? (char->integer object)))) + (define (unicode-char-code? object) (and (unicode-scalar-value? object) (not (non-character? object)))) -(define-integrable (unicode-code-point? object) - (and (index-fixnum? object) - (fix:< object char-code-limit))) - (define (unicode-scalar-value? object) (and (unicode-code-point? object) (not (utf16-surrogate? object)))) +(define-integrable (unicode-code-point? object) + (and (index-fixnum? object) + (fix:< object char-code-limit))) + (define-guarantee unicode-char "a Unicode character") (define-guarantee unicode-scalar-value "a Unicode scalar value") +(define (char->code-point char #!optional caller) + (let ((n (char->integer char))) + (guarantee unicode-code-point? n caller) + n)) + (define (char->scalar-value char #!optional caller) (let ((n (char->integer char))) (guarantee unicode-scalar-value? n caller) n)) -(define (unicode-char->scalar-value char #!optional caller) - (guarantee unicode-char? char caller) - (char->integer char)) - -(define (unicode-scalar-value->char sv #!optional caller) - (guarantee unicode-scalar-value? sv caller) - (integer->char sv)) - (define (char-general-category char) - (guarantee unicode-char? char 'char-general-category) - (%char-general-category char)) - -(define (unicode-code-point-general-category cp) - (guarantee unicode-code-point? cp 'unicode-code-point-general-category) - (%char-general-category (integer->char cp))) + (guarantee base-char? char 'char-general-category) + (ucd-gc-value char)) -(define-integrable (%char-general-category char) - (let ((value (ucd-gc-value char))) - (and (symbol? value) - value))) +(define (code-point-general-category cp) + (guarantee unicode-code-point? cp 'code-point-general-category) + (ucd-gc-value (integer->char cp))) (define-integrable (utf16-surrogate? cp) (fix:= #xD800 (fix:and #xF800 cp))) diff --git a/src/runtime/predicate-metadata.scm b/src/runtime/predicate-metadata.scm index 89070c697..086a9c6cd 100644 --- a/src/runtime/predicate-metadata.scm +++ b/src/runtime/predicate-metadata.scm @@ -289,6 +289,7 @@ USA. ;; MIT/GNU Scheme: misc (register-predicate! 8-bit-char? '8-bit-char '<= char?) (register-predicate! ascii-char? 'ascii-char '<= 8-bit-char?) + (register-predicate! base-char? 'base-char '<= char?) (register-predicate! bit-string? 'bit-string) (register-predicate! cell? 'cell) (register-predicate! code-point-list? 'code-point-list '<= list?) diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index 4d366a67c..338a21614 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -1320,6 +1320,7 @@ USA. (parent (runtime)) (export () ;; BEGIN deprecated bindings + (code->char integer->char) (error:not-wide-char error:not-unicode-char) (guarantee-wide-char guarantee-unicode-char) (wide-char? unicode-char?) @@ -1334,6 +1335,7 @@ USA. ;; END deprecated bindings 8-bit-char? ascii-char? + base-char? char-8-bit? char->digit char->integer @@ -1361,9 +1363,6 @@ USA. char-general-category char-integer-limit char-upcase - char-utf16-byte-length - char-utf32-byte-length - char-utf8-byte-length char<=? char? char? clear-char-bits - code->char - decode-utf16be-char - decode-utf16le-char - decode-utf32be-char - decode-utf32le-char + code-point-general-category decode-utf8-char digit->char digit-value - encode-utf16be-char! - encode-utf16le-char! - encode-utf32be-char! - encode-utf32le-char! - encode-utf8-char! - initial-byte->utf8-char-length - initial-u16->utf16-char-length - initial-u32->utf32-char-length integer->char make-char name->char radix? set-char-bits - unicode-char->scalar-value unicode-char-code? unicode-char? - unicode-code-point-general-category unicode-code-point? - unicode-scalar-value->char unicode-scalar-value?) (export (runtime) char-downcase-full char-foldcase-full - char-upcase-full)) + char-upcase-full + char-utf16-byte-length + char-utf32-byte-length + char-utf8-byte-length + decode-utf16be-char + decode-utf16le-char + decode-utf32be-char + decode-utf32le-char + encode-utf16be-char! + encode-utf16le-char! + encode-utf32be-char! + encode-utf32le-char! + encode-utf8-char! + initial-byte->utf8-char-length + initial-u16->utf16-char-length + initial-u32->utf32-char-length)) (define-package (runtime ucd-tables) (files "ucd-table-alpha" diff --git a/src/runtime/ucd-glue.scm b/src/runtime/ucd-glue.scm index ac1ffb43e..8bfbd86f0 100644 --- a/src/runtime/ucd-glue.scm +++ b/src/runtime/ucd-glue.scm @@ -69,7 +69,7 @@ USA. ((#x22 #x23 #x27 #x2c #x3b #x5c #x60 #x7c) #f) ((#x200C #x200D) #t) (else - (case (unicode-code-point-general-category sv) + (case (code-point-general-category sv) ((letter:uppercase letter:lowercase letter:titlecase @@ -110,7 +110,7 @@ USA. (define-deferred char-set:normal-printing (compute-char-set (lambda (sv) - (case (unicode-code-point-general-category sv) + (case (code-point-general-category sv) ((letter:uppercase letter:lowercase letter:titlecase -- 2.25.1