(define-integrable char-code-limit #x110000)
(define-integrable char-bits-limit #x10)
-(define-integrable char-integer-limit #x2000000)
(define-guarantee char "character")
(define (char-bits char)
(fix:lsh (char->integer char) -21))
+(define (bitless-char? object)
+ (and (char? object)
+ (fix:< (char->integer object) char-code-limit)))
+
(define (char-bits-set? bits char)
(guarantee-limited-index-fixnum bits char-bits-limit 'CHAR-BITS-SET?)
(fix:= bits (fix:and (char-bits char) bits)))
(define (unicode-char? object)
(and (char? object)
- (unicode-char-code? (char->integer object))))
-
-(define (base-char? object)
- (and (char? object)
- (unicode-code-point? (char->integer object))))
-
-(define (unicode-char-code? object)
- (and (unicode-scalar-value? object)
- (not (non-character? object))))
+ (let ((cp (char->integer object)))
+ (and (fix:< object char-code-limit)
+ (not (utf16-surrogate? object))))))
(define (unicode-scalar-value? object)
(and (unicode-code-point? object)
(not (utf16-surrogate? object))))
-(define-integrable (unicode-code-point? object)
+(define (unicode-code-point? object)
(and (index-fixnum? object)
(fix:< object char-code-limit)))
(define-guarantee unicode-char "a Unicode character")
(define-guarantee unicode-scalar-value "a Unicode scalar value")
-(define (char->code-point char #!optional caller)
- (let ((n (char->integer char)))
- (guarantee unicode-code-point? n caller)
- n))
-
-(define (char->scalar-value char #!optional caller)
+(define (%char->scalar-value char #!optional caller)
(let ((n (char->integer char)))
(guarantee unicode-scalar-value? n caller)
n))
(define (char-general-category char)
- (guarantee base-char? char 'char-general-category)
+ (guarantee bitless-char? char 'char-general-category)
(ucd-gc-value char))
(define (code-point-general-category cp)
;;;; UTF-{8,16,32} encoders
(define (char-utf8-byte-length char)
- (let ((sv (char->scalar-value char 'char-utf8-byte-length)))
+ (let ((sv (%char->scalar-value char 'char-utf8-byte-length)))
(cond ((fix:< sv #x80) 1)
((fix:< sv #x800) 2)
((fix:< sv #x10000) 3)
(else 4))))
(define (encode-utf8-char! bytes index char)
- (let ((sv (char->scalar-value char 'encode-utf8-char!)))
+ (let ((sv (%char->scalar-value char 'encode-utf8-char!)))
(define-integrable (initial-byte leader offset)
(fix:or leader (fix:lsh sv offset)))
(fix:+ index 4)))))
(define (char-utf16-byte-length char)
- (if (fix:< (char->scalar-value char 'char-utf16-byte-length) #x10000)
+ (if (fix:< (%char->scalar-value char 'char-utf16-byte-length) #x10000)
2
4))
(define (utf16-char-encoder setter caller)
(lambda (bytes index char)
- (let ((sv (char->scalar-value char caller)))
+ (let ((sv (%char->scalar-value char caller)))
(cond ((fix:< sv #x10000)
(setter bytes index sv)
(fix:+ index 2))
(utf16-char-encoder bytevector-u16le-set! 'encode-utf16le-char!))
(define (char-utf32-byte-length char)
- (char->scalar-value char 'char-utf32-byte-length)
+ (%char->scalar-value char 'char-utf32-byte-length)
4)
(define (utf32-char-encoder setter caller)
(lambda (bytes index char)
- (setter bytes index (char->scalar-value char caller))))
+ (setter bytes index (%char->scalar-value char caller))))
(define encode-utf32be-char!
(utf32-char-encoder bytevector-u32be-set! 'encode-utf32be-char!))
(define (%cpl-element->ranges elt)
(cond ((%range? elt) (list elt))
- ((base-char? elt) (list (char->integer elt)))
+ ((bitless-char? elt) (list (char->integer elt)))
((ustring? elt) (map char->integer (ustring->list elt)))
(else #f)))
(define (cpl-element? object)
(or (%range? object)
- (base-char? object)
+ (bitless-char? object)
(ustring? object)
(char-set? object)))
;;;; Accessors
(define (char-in-set? char char-set)
- (guarantee base-char? char 'char-in-set?)
+ (guarantee bitless-char? char 'char-in-set?)
(%code-point-in-char-set? (char->integer char) char-set))
(define (code-point-in-char-set? cp char-set)
(define char-set:not-graphic)
(define char-set:not-standard)
(define char-set:standard)
-(define char-set:unicode)
(define char-set:wsp)
(define char-standard?)
(define char-wsp?)
(add-boot-init!
(lambda ()
- (set! char-set:unicode (compute-char-set unicode-char-code?))
-
(set! char-set:graphic (%signal->char-set '(#x20 #x7F #xA0 #x100)))
(set! char-set:not-graphic (char-set-invert char-set:graphic))
(set! char-graphic? (char-set-predicate char-set:graphic))
;; MIT/GNU Scheme: misc
(register-predicate! 8-bit-char? '8-bit-char '<= char?)
(register-predicate! ascii-char? 'ascii-char '<= 8-bit-char?)
- (register-predicate! base-char? 'base-char '<= char?)
(register-predicate! bit-string? 'bit-string)
+ (register-predicate! bitless-char? 'bitless-char '<= char?)
(register-predicate! cell? 'cell)
(register-predicate! code-point-list? 'code-point-list '<= list?)
(register-predicate! compiled-code-address? 'compiled-code-address)
(register-predicate! stack-address? 'stack-address)
(register-predicate! thread-mutex? 'thread-mutex)
(register-predicate! undefined-value? 'undefined-value)
- (register-predicate! unicode-char? 'unicode-char '<= base-char?)
+ (register-predicate! unicode-char? 'unicode-char '<= bitless-char?)
(register-predicate! unicode-code-point? 'unicode-code-point
'<= index-fixnum?)
(register-predicate! unicode-scalar-value? 'unicode-scalar-value
'<= unicode-code-point?)
- (register-predicate! unicode-char-code? 'unicode-char-code
- '<= unicode-scalar-value?)
(register-predicate! uninterned-symbol? 'uninterned-symbol '<= symbol?)
(register-predicate! weak-list? 'weak-list)
(register-predicate! weak-pair? 'weak-pair)