(define (chars->ascii chars)
(map char->ascii chars))
\f
-(define (char-utf8-byte-length char)
- (%sv-utf8-byte-length (unicode-char->scalar-value char)))
+;;;; UTF-{8,16,32} encoders
-(define (char-utf8-bytes char)
+(define (char-utf8-byte-length char)
(let ((sv (unicode-char->scalar-value char)))
- (let ((bytes (make-bytevector (%sv-utf8-byte-length sv))))
- (%sv-utf8-bytes! bytes 0 sv)
- bytes)))
+ (cond ((fix:< sv #x00000080) 1)
+ ((fix:< sv #x00000800) 2)
+ ((fix:< sv #x00010000) 3)
+ (else 4))))
(define (char-utf8-bytes! bytes index char)
- (%sv-utf8-bytes! bytes index (unicode-char->scalar-value char)))
-
-(define (%sv-utf8-byte-length sv)
- (cond ((fix:< sv #x00000080) 1)
- ((fix:< sv #x00000800) 2)
- ((fix:< sv #x00010000) 3)
- (else 4)))
-
-(define (%sv-utf8-bytes! bytes index sv)
-
- (define-integrable (initial-byte n-bits offset)
- (fix:or (fix:and (fix:lsh #xFF (fix:+ n-bits 1)) #xFF)
- (fix:lsh sv (fix:- 0 offset))))
-
- (define-integrable (trailing-byte offset)
- (fix:or #x80 (fix:and (fix:lsh sv (fix:- 0 offset)) #x3F)))
-
- (cond ((fix:< sv #x00000080)
- (bytevector-u8-set! bytes index sv)
- (fix:+ index 1))
- ((fix:< sv #x00000800)
- (bytevector-u8-set! bytes index (initial-byte 5 6))
- (bytevector-u8-set! bytes (fix:+ index 1) (trailing-byte 0))
- (fix:+ index 2))
- ((fix:< sv #x00010000)
- (bytevector-u8-set! bytes index (initial-byte 4 12))
- (bytevector-u8-set! bytes (fix:+ index 1) (trailing-byte 6))
- (bytevector-u8-set! bytes (fix:+ index 2) (trailing-byte 0))
- (fix:+ index 3))
- (else
- (bytevector-u8-set! bytes index (initial-byte 3 18))
- (bytevector-u8-set! bytes (fix:+ index 1) (trailing-byte 12))
- (bytevector-u8-set! bytes (fix:+ index 2) (trailing-byte 6))
- (bytevector-u8-set! bytes (fix:+ index 3) (trailing-byte 0))
- (fix:+ index 4))))
+ (let ((sv (unicode-char->scalar-value char)))
+
+ (define-integrable (initial-byte n-bits offset)
+ (fix:or (fix:and (fix:lsh #xFF (fix:+ n-bits 1)) #xFF)
+ (fix:lsh sv (fix:- 0 offset))))
+
+ (define-integrable (trailing-byte offset)
+ (fix:or #x80 (fix:and (fix:lsh sv (fix:- 0 offset)) #x3F)))
+
+ (cond ((fix:< sv #x00000080)
+ (bytevector-u8-set! bytes index sv)
+ (fix:+ index 1))
+ ((fix:< sv #x00000800)
+ (bytevector-u8-set! bytes index (initial-byte 5 6))
+ (bytevector-u8-set! bytes (fix:+ index 1) (trailing-byte 0))
+ (fix:+ index 2))
+ ((fix:< sv #x00010000)
+ (bytevector-u8-set! bytes index (initial-byte 4 12))
+ (bytevector-u8-set! bytes (fix:+ index 1) (trailing-byte 6))
+ (bytevector-u8-set! bytes (fix:+ index 2) (trailing-byte 0))
+ (fix:+ index 3))
+ (else
+ (bytevector-u8-set! bytes index (initial-byte 3 18))
+ (bytevector-u8-set! bytes (fix:+ index 1) (trailing-byte 12))
+ (bytevector-u8-set! bytes (fix:+ index 2) (trailing-byte 6))
+ (bytevector-u8-set! bytes (fix:+ index 3) (trailing-byte 0))
+ (fix:+ index 4)))))
+
+(define (char-utf16-byte-length char)
+ (if (fix:< (unicode-char->scalar-value char) #x00010000) 2 4))
+
+(define (char-utf16be-bytes! bytes index char)
+ (let ((sv (unicode-char->scalar-value char)))
+ (cond ((fix:< sv #x10000)
+ (bytevector-u16be-set! bytes index sv)
+ (fix:+ index 2))
+ (else
+ (receive (h l) (split-into-utf16-surrogates sv)
+ (bytevector-u16be-set! bytes index h)
+ (bytevector-u16be-set! bytes (fix:+ index 1) l))
+ (fix:+ index 4)))))
+
+(define (char-utf16le-bytes! bytes index char)
+ (let ((sv (unicode-char->scalar-value char)))
+ (cond ((fix:< sv #x10000)
+ (bytevector-u16le-set! bytes index sv)
+ (fix:+ index 2))
+ (else
+ (receive (h l) (split-into-utf16-surrogates sv)
+ (bytevector-u16le-set! bytes index l)
+ (bytevector-u16le-set! bytes (fix:+ index 1) h))
+ (fix:+ index 4)))))
+
+(define (char-utf32-byte-length char)
+ (if (fix:< (unicode-char->scalar-value char) #x00010000) 2 4))
+
+(define (char-utf32be-bytes! bytes index char)
+ (bytevector-u32be-set! bytes index (unicode-char->scalar-value char)))
+
+(define (char-utf32le-bytes! bytes index char)
+ (bytevector-u32le-set! bytes index (unicode-char->scalar-value char)))
\f
(define (char=? x y)
(fix:= (char->integer x) (char->integer y)))