;;;; UTF-{8,16,32} encoders
(define (char-utf8-byte-length char)
- (let ((sv (unicode-char->scalar-value char 'char-utf8-byte-length)))
+ (let ((sv (char->scalar-value char 'char-utf8-byte-length)))
(cond ((fix:< sv #x80) 1)
((fix:< sv #x800) 2)
((fix:< sv #x10000) 3)
(else 4))))
(define (encode-utf8-char! bytes index char)
- (let ((sv (unicode-char->scalar-value char 'encode-utf8-char!)))
+ (let ((sv (char->scalar-value char 'encode-utf8-char!)))
(define-integrable (initial-byte leader offset)
(fix:or leader (fix:lsh sv offset)))
(fix:+ index 4)))))
(define (char-utf16-byte-length char)
- (if (fix:< (unicode-char->scalar-value char 'char-utf16-byte-length) #x10000)
+ (if (fix:< (char->scalar-value char 'char-utf16-byte-length) #x10000)
2
4))
(define (utf16-char-encoder setter caller)
(lambda (bytes index char)
- (let ((sv (unicode-char->scalar-value char caller)))
+ (let ((sv (char->scalar-value char caller)))
(cond ((fix:< sv #x10000)
(setter bytes index sv)
(fix:+ index 2))
(utf16-char-encoder bytevector-u16le-set! 'encode-utf16le-char!))
(define (char-utf32-byte-length char)
- (unicode-char->scalar-value char 'char-utf32-byte-length)
+ (char->scalar-value char 'char-utf32-byte-length)
4)
(define (utf32-char-encoder setter caller)
(lambda (bytes index char)
- (setter bytes index (unicode-char->scalar-value char caller))))
+ (setter bytes index (char->scalar-value char caller))))
(define encode-utf32be-char!
(utf32-char-encoder bytevector-u32be-set! 'encode-utf32be-char!))
\f
;;;; UTF-{8,16,32} decoders
-(define (initial-byte->utf8-char-length byte)
- (guarantee byte? byte 'initial-byte->utf8-char-length)
- (cond ((utf8-initial-byte-1? byte) 1)
- ((utf8-initial-byte-2? byte) 2)
- ((utf8-initial-byte-3? byte) 3)
- ((utf8-initial-byte-4? byte) 4)
- (else (error "Illegal UTF-8 initial byte:" byte))))
+(define (initial-byte->utf8-char-length b0)
+ (guarantee u8? b0 'initial-byte->utf8-char-length)
+ (cond ((utf8-initial-byte-1? b0) 1)
+ ((utf8-initial-byte-2? b0) 2)
+ ((utf8-initial-byte-3? b0) 3)
+ ((utf8-initial-byte-4? b0) 4)
+ (else (error "Illegal UTF-8 initial byte:" b0))))
(define (decode-utf8-char bytes index)
(integer->char
(cond ((utf8-initial-byte-1? b0)
b0)
((utf8-initial-byte-2? b0)
- (decode-utf8-2 b0
- (bytevector-u8-ref bytes (fix:+ index 1))))
+ (let ((b1 (bytevector-u8-ref bytes (fix:+ index 1))))
+ (if (not (valid-utf8-sequence-2? b0 b1))
+ (error "Ill-formed UTF-8 sequence:" b0 b1))
+ (fix:or (extract-bits b0 #x1F 6)
+ (extract-bits b1 #x3F 0))))
((utf8-initial-byte-3? b0)
- (decode-utf8-3 b0
- (bytevector-u8-ref bytes (fix:+ index 1))
- (bytevector-u8-ref bytes (fix:+ index 2))))
+ (let ((b1 (bytevector-u8-ref bytes (fix:+ index 1)))
+ (b2 (bytevector-u8-ref bytes (fix:+ index 2))))
+ (if (not (valid-utf8-sequence-3? b0 b1 b2))
+ (error "Ill-formed UTF-8 sequence:" b0 b1 b2))
+ (fix:or (fix:or (extract-bits b0 #x0F 12)
+ (extract-bits b1 #x3F 6))
+ (extract-bits b2 #x3F 0))))
((utf8-initial-byte-4? b0)
- (decode-utf8-4 b0
- (bytevector-u8-ref bytes (fix:+ index 1))
- (bytevector-u8-ref bytes (fix:+ index 2))
- (bytevector-u8-ref bytes (fix:+ index 3))))
+ (let ((b1 (bytevector-u8-ref bytes (fix:+ index 1)))
+ (b2 (bytevector-u8-ref bytes (fix:+ index 2)))
+ (b3 (bytevector-u8-ref bytes (fix:+ index 3))))
+ (if (not (valid-utf8-sequence-4? b0 b1 b2 b3))
+ (error "Ill-formed UTF-8 sequence:" b0 b1 b2 b3))
+ (fix:or (fix:or (extract-bits b0 #x07 18)
+ (extract-bits b1 #x3F 12))
+ (fix:or (extract-bits b2 #x3F 6)
+ (extract-bits b3 #x3F 0)))))
(else
(error "Illegal UTF-8 initial byte:" b0))))))
-(define (decode-utf8-2 b0 b1)
- (if (not (and (fix:> b0 #xC1)
- (utf8-trailing-byte? b1)))
- (error "Ill-formed UTF-8 sequence:" b0 b1))
- (fix:or (extract-bits b0 #x1F 6)
- (extract-bits b1 #x3F 0)))
-
-(define (decode-utf8-3 b0 b1 b2)
- (if (not (and (or (fix:> b0 #xE0) (fix:> b1 #x9F))
- (utf8-trailing-byte? b1)
- (utf8-trailing-byte? b2)))
- (error "Ill-formed UTF-8 sequence:" b0 b1 b2))
- (let ((cp
- (fix:or (fix:or (extract-bits b0 #x0F 12)
- (extract-bits b1 #x3F 6))
- (extract-bits b2 #x3F 0))))
- (guarantee-cp-not-utf16-surrogate cp)
- (guarantee-cp-is-character cp)
- cp))
-
-(define (decode-utf8-4 b0 b1 b2 b3)
- (if (not (and (or (fix:> b0 #xF0) (fix:> b1 #x8F))
- (utf8-trailing-byte? b1)
- (utf8-trailing-byte? b2)
- (utf8-trailing-byte? b3)))
- (error "Ill-formed UTF-8 sequence:" b0 b1 b2 b3))
- (let ((cp
- (fix:or (fix:or (extract-bits b0 #x07 18)
- (extract-bits b1 #x3F 12))
- (fix:or (extract-bits b2 #x3F 6)
- (extract-bits b3 #x3F 0)))))
- (guarantee-cp-in-range cp)
- (guarantee-cp-is-character cp)
- cp))
-
(define-integrable (utf8-initial-byte-1? byte)
(fix:= #x00 (fix:and #x80 byte)))
(define-integrable (utf8-initial-byte-2? byte)
- (fix:= #xC0 (fix:and #xE0 byte)))
+ (and (fix:>= byte #xC2) (fix:<= byte #xDF)))
(define-integrable (utf8-initial-byte-3? byte)
(fix:= #xE0 (fix:and #xF0 byte)))
(define-integrable (utf8-initial-byte-4? byte)
- (fix:= #xF0 (fix:and #xF8 byte)))
-
-(define-integrable (utf8-trailing-byte? byte)
+ (and (fix:>= byte #xF0) (fix:<= byte #xF4)))
+\f
+;; code-point range b0
+;; ------------------ ------
+;; U+000000..U+00007F 00..7F
+(define-integrable (valid-utf8-sequence-1? b0)
+ (utf8-initial-byte-1? b0))
+
+;; code-point range b0 b1
+;; ------------------ ------ ------
+;; U+000080..U+0007FF C2..DF 80..BF
+(define-integrable (valid-utf8-sequence-2? b0 b1)
+ (and (utf8-initial-byte-2? b0)
+ (u8:80..BF? b1)))
+
+;; code-point range b0 b1 b2
+;; ------------------ ------ ------ ------
+;; U+000800..U+000FFF E0 A0..BF 80..BF
+;; U+001000..U+00CFFF E1..EC 80..BF 80..BF
+;; U+00D000..U+00D7FF ED 80..9F 80..BF
+;; U+00E000..U+00FFFF EE..EF 80..BF 80..BF
+(define-integrable (valid-utf8-sequence-3? b0 b1 b2)
+ (and (utf8-initial-byte-3? b0)
+ (cond ((fix:= b0 #xE0) (u8:A0..BF? b1))
+ ((fix:< b0 #xED) (u8:80..BF? b1))
+ ((fix:= b0 #xED) (u8:80..9F? b1))
+ (else (u8:80..BF? b1)))
+ (u8:80..BF? b2)))
+
+;; code-point range b0 b1 b2 b3
+;; ------------------ ------ ------ ------ ------
+;; U+010000..U+03FFFF F0 90..BF 80..BF 80..BF
+;; U+040000..U+0FFFFF F1..F3 80..BF 80..BF 80..BF
+;; U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
+(define-integrable (valid-utf8-sequence-4? b0 b1 b2 b3)
+ (and (utf8-initial-byte-4? b0)
+ (cond ((fix:= b0 #xF0) (u8:90..BF? b1))
+ ((fix:< b0 #xF4) (u8:80..BF? b1))
+ (else (u8:80..8F? b1)))
+ (u8:80..BF? b2)
+ (u8:80..BF? b3)))
+
+;; Trailing bytes:
+
+(define-integrable (u8:80..8F? byte)
+ (fix:= #x80 (fix:and #xF0 byte)))
+
+(define-integrable (u8:80..9F? byte)
+ (fix:= #x80 (fix:and #xE0 byte)))
+
+(define-integrable (u8:80..BF? byte)
(fix:= #x80 (fix:and #xC0 byte)))
+
+(define-integrable (u8:90..BF? byte)
+ (and (fix:>= byte #x90) (fix:<= byte #xBF)))
+
+(define-integrable (u8:A0..BF? byte)
+ (and (fix:>= byte #xA0) (fix:<= byte #xBF)))
\f
(define (initial-u16->utf16-char-length u16)
(guarantee u16? u16 'initial-u16->utf16-char-length)
- (if (utf16-high-surrogate? u16) 4 2))
+ (if (utf16-low-surrogate? u16)
+ (error "Illegal initial UTF-16 unit:" u16))
+ (if (utf16-high-surrogate? u16)
+ 4
+ 2))
(define (utf16-char-decoder getter)
(lambda (bytes index)
- (let ((d0 (getter bytes index)))
- (if (utf16-low-surrogate? d0)
- (error "Ill-formed UTF-16 sequence:" d0))
- (let ((cp
- (if (utf16-high-surrogate? d0)
- (let ((d1 (getter bytes (fix:+ index 2))))
- (if (not (utf16-low-surrogate? d1))
- (error "Ill-formed UTF-16 sequence:" d0 d1))
- (fix:+ (fix:or (extract-bits d0 #x3FF 10)
- (extract-bits d1 #x3FF 0))
- #x10000))
- d0)))
- (guarantee-cp-in-range cp)
- (guarantee-cp-is-character cp)
- (integer->char cp)))))
+ (integer->char
+ (let ((d0 (getter bytes index)))
+ (if (utf16-low-surrogate? d0)
+ (error "Illegal initial UTF-16 unit:" d0))
+ (if (utf16-high-surrogate? d0)
+ (let ((d1 (getter bytes (fix:+ index 2))))
+ (if (not (utf16-low-surrogate? d1))
+ (error "Ill-formed UTF-16 sequence:" d0 d1))
+ (fix:+ (fix:or (extract-bits d0 #x3FF 10)
+ (extract-bits d1 #x3FF 0))
+ #x10000))
+ d0)))))
(define decode-utf16be-char
(utf16-char-decoder bytevector-u16be-ref))
(utf16-char-decoder bytevector-u16le-ref))
(define (initial-u32->utf32-char-length u32)
- (guarantee u32? u32 'initial-u32->utf32-char-length)
+ (guarantee unicode-scalar-value? u32 'initial-u32->utf32-char-length)
4)
(define (utf32-char-decoder getter)
(lambda (bytes index)
(let ((u32 (getter bytes index)))
- (if (not (< u32 char-code-limit))
- (error "Value is not a code point:" u32))
- (guarantee-cp-not-utf16-surrogate u32)
- (guarantee-cp-is-character u32)
+ (guarantee unicode-scalar-value? u32 'utf32-char-decoder)
(integer->char u32))))
(define decode-utf32be-char
named-chars)))
(define ascii-chars
- '(#\u+00 #\u+01 #\u+02 #\u+03 #\u+04 #\u+05 #\u+06 #\u+07
- #\u+08 #\u+09 #\u+0A #\u+0B #\u+0C #\u+0D #\u+0E #\u+0F
- #\u+10 #\u+11 #\u+12 #\u+13 #\u+14 #\u+15 #\u+16 #\u+17
- #\u+18 #\u+19 #\u+1A #\u+1B #\u+1C #\u+1D #\u+1E #\u+1F
- #\u+20 #\! #\" #\# #\$ #\% #\& #\' #\( #\) #\* #\+ #\, #\- #\. #\/
+ '(#\x00 #\x01 #\x02 #\x03 #\x04 #\x05 #\x06 #\x07
+ #\x08 #\x09 #\x0A #\x0B #\x0C #\x0D #\x0E #\x0F
+ #\x10 #\x11 #\x12 #\x13 #\x14 #\x15 #\x16 #\x17
+ #\x18 #\x19 #\x1A #\x1B #\x1C #\x1D #\x1E #\x1F
+ #\x20 #\! #\" #\# #\$ #\% #\& #\' #\( #\) #\* #\+ #\, #\- #\. #\/
#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\: #\; #\< #\= #\> #\? #\@
#\A #\B #\C #\D #\E #\F #\G #\H #\I #\J #\K #\L #\M #\N #\O #\P #\Q
#\R #\S #\T #\U #\V #\W #\X #\Y #\Z #\[ #\\ #\] #\^ #\_ #\`
#\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n #\o #\p #\q
- #\r #\s #\t #\u #\v #\w #\x #\y #\z #\{ #\| #\} #\~ #\u+7F))
+ #\r #\s #\t #\u #\v #\w #\x #\y #\z #\{ #\| #\} #\~ #\x7F))
(define-test 'basic-ascii
(lambda ()
ascii-chars)
;; 2.1 First possible sequence of a certain length
- (#u8(#x00) #\u+00000000)
- (#u8(#xC2 #x80) #\u+00000080)
- (#u8(#xE0 #xA0 #x80) #\u+00000800)
- (#u8(#xF0 #x90 #x80 #x80) #\u+00010000)
+ (#u8(#x00) #\x00000000)
+ (#u8(#xC2 #x80) #\x00000080)
+ (#u8(#xE0 #xA0 #x80) #\x00000800)
+ (#u8(#xF0 #x90 #x80 #x80) #\x00010000)
;; 2.2 Last possible sequence of a certain length
- (#u8(#x7F) #\u+0000007F)
- (#u8(#xDF #xBF) #\u+000007FF)
- (#u8(#xEF #xBF #xBD) #\u+0000FFFD)
+ (#u8(#x7F) #\x0000007F)
+ (#u8(#xDF #xBF) #\x000007FF)
+ (#u8(#xEF #xBF #xBD) #\x0000FFFD)
+ (#u8(#xEF #xBF #xBF) #\x0000FFFF)
;; 2.3 Other boundary conditions
- (#u8(#xED #x9F #xBF) #\u+0000D7FF)
- (#u8(#xEE #x80 #x80) #\u+0000E000)
- (#u8(#xEF #xBF #xBD) #\u+0000FFFD)
- (#u8(#xF4 #x8F #xBF #xBD) #\u+0010FFFD)
+ (#u8(#xED #x9F #xBF) #\x0000D7FF)
+ (#u8(#xEE #x80 #x80) #\x0000E000)
+ (#u8(#xEF #xBF #xBD) #\x0000FFFD)
+ (#u8(#xF4 #x8F #xBF #xBD) #\x0010FFFD)
+ (#u8(#xF4 #x8F #xBF #xBF) #\x0010FFFF)
+
+ ;; 5.3 Noncharacter code positions
+ ;; Particularly problematic noncharacters in 16-bit applications:
+ (#u8(#xEF #xBF #xBE) #\xFFFE)
+ (#u8(#xEF #xBF #xBF) #\xFFFF)
))
+
+(define-test 'utf8-initial-byte
+ (lambda ()
+ (for-each (lambda (b)
+ (if (memv b invalid-utf8-initial-bytes)
+ (assert-error
+ (lambda () (initial-byte->utf8-char-length b)))
+ (assert-= (cond ((< b #x80) 1)
+ ((< b #xE0) 2)
+ ((< b #xF0) 3)
+ (else 4))
+ (initial-byte->utf8-char-length b))))
+ (iota #x100))))
+
+(define invalid-utf8-initial-bytes
+ (append (iota (- #xc2 #x80) #x80)
+ (iota (- #x100 #xf5) #xF5)))
\f
(define-test 'invalid-known-length-utf8-sequences
(lambda ()
(for-each (lambda (entry)
(let ((bytes (car entry))
(length (cadr entry)))
- (assert-= length
- (initial-byte->utf8-char-length
- (bytevector-u8-ref bytes 0)))
+ (let ((b0 (bytevector-u8-ref bytes 0)))
+ (if (not (memv b0 invalid-utf8-initial-bytes))
+ (assert-= length
+ (initial-byte->utf8-char-length b0))))
(assert-error (lambda () (decode-utf8-char bytes 0)))))
invalid-known-length-sequences)))
,@(map (lambda (bytes)
(list bytes (+ (bytevector-length bytes) 1)))
'(
- #u8(#xC0) ; #\u+0000
- #u8(#xE0 #x80) ; #\u+0000
- #u8(#xF0 #x80 #x80) ; #\u+0000
- #u8(#xDF) ; #\u+000007FF
- #u8(#xEF #xBF) ; #\u+0000FFFF
- #u8(#xF7 #xBF #xBF) ; #\u+001FFFFF
+ #u8(#xC0) ; #\x0000
+ #u8(#xE0 #x80) ; #\x0000
+ #u8(#xF0 #x80 #x80) ; #\x0000
+ #u8(#xDF) ; #\x000007FF
+ #u8(#xEF #xBF) ; #\x0000FFFF
+ #u8(#xF7 #xBF #xBF) ; #\x001FFFFF
))
))
-(define-test 'illegal-chars
+(define-test 'utf16-surrogates
(lambda ()
(for-each (lambda (cp)
+ (value-assert unicode-code-point? "code point" cp)
(value-assert (lambda (cp)
(not (unicode-scalar-value? cp)))
"non-scalar value"
- cp))
- utf16-surrogates)
- (for-each (lambda (cp)
- (value-assert unicode-scalar-value? "scalar value" cp))
- illegal-characters)
- (for-each (lambda (cp)
- (value-assert unicode-code-point? "code point" cp)
+ cp)
(let ((char (integer->char cp)))
(value-assert (lambda (char)
(not (unicode-char? char)))
(assert-error
(lambda ()
(encode-utf8-char! (make-bytevector 16) 0 char)))))
- (append utf16-surrogates illegal-characters))))
+ utf16-surrogates)))
(define utf16-surrogates
(iota #x800 #xD800))
+(define-test 'illegal-chars
+ (lambda ()
+ (for-each (lambda (cp)
+ (value-assert unicode-code-point? "code point" cp)
+ (value-assert unicode-scalar-value? "scalar value" cp)
+ (let ((char (integer->char cp)))
+ (value-assert (lambda (char)
+ (not (unicode-char? char)))
+ "non-unicode character"
+ char)
+ (encode-utf8-char! (make-bytevector 16) 0 char)))
+ illegal-characters)))
+
(define illegal-characters
`(
;; Other noncharacters:
(define invalid-utf8-sequences
`(
- ;; 2.2 Last possible sequence of a certain length
- #u8(#xEF #xBF #xBF) ; #\u+0000FFFF
- #u8(#xF7 #xBF #xBF #xBF) ; #\u+001FFFFF
-
;; 2.3 Other boundary conditions
- #u8(#xF4 #x8F #xBF #xBF) ; #\u+0010FFFF
- #u8(#xF4 #x90 #x80 #x80) ; #\u+00110000
+ #u8(#xF4 #x90 #x80 #x80) ; #\x00110000
+ #u8(#xF7 #xBF #xBF #xBF) ; #\x001FFFFF
;; 3.1 Unexpected continuation bytes
;; (duplicated below)
(iota #x02 #xFC))
;; 3.3 Sequences with last continuation byte missing
- #u8(#xF8 #x80 #x80 #x80) ; #\u+0000
- #u8(#xFC #x80 #x80 #x80 #x80) ; #\u+0000
- #u8(#xFB #xBF #xBF #xBF) ; #\u+03FFFFFF
- #u8(#xFD #xBF #xBF #xBF #xBF) ; #\u+7FFFFFFF
+ #u8(#xF8 #x80 #x80 #x80) ; #\x0000
+ #u8(#xFC #x80 #x80 #x80 #x80) ; #\x0000
+ #u8(#xFB #xBF #xBF #xBF) ; #\x03FFFFFF
+ #u8(#xFD #xBF #xBF #xBF #xBF) ; #\x7FFFFFFF
;; 4.1 Examples of an overlong ASCII character
- #u8(#xC0 #xAF) ; #\u+002F
- #u8(#xE0 #x80 #xAF) ; #\u+002F
- #u8(#xF0 #x80 #x80 #xAF) ; #\u+002F
- #u8(#xF8 #x80 #x80 #x80 #xAF) ; #\u+002F
- #u8(#xFC #x80 #x80 #x80 #x80 #xAF) ; #\u+002F
+ #u8(#xC0 #xAF) ; #\x002F
+ #u8(#xE0 #x80 #xAF) ; #\x002F
+ #u8(#xF0 #x80 #x80 #xAF) ; #\x002F
+ #u8(#xF8 #x80 #x80 #x80 #xAF) ; #\x002F
+ #u8(#xFC #x80 #x80 #x80 #x80 #xAF) ; #\x002F
;; 4.2 Maximum overlong sequences
- #u8(#xC1 #xBF) ; #\u+0000007F
- #u8(#xE0 #x9F #xBF) ; #\u+000007FF
- #u8(#xF0 #x8F #xBF #xBF) ; #\u+0000FFFF
- #u8(#xF8 #x87 #xBF #xBF #xBF) ; #\u+001FFFFF
- #u8(#xFC #x83 #xBF #xBF #xBF #xBF) ; #\u+03FFFFFF
+ #u8(#xC1 #xBF) ; #\x0000007F
+ #u8(#xE0 #x9F #xBF) ; #\x000007FF
+ #u8(#xF0 #x8F #xBF #xBF) ; #\x0000FFFF
+ #u8(#xF8 #x87 #xBF #xBF #xBF) ; #\x001FFFFF
+ #u8(#xFC #x83 #xBF #xBF #xBF #xBF) ; #\x03FFFFFF
;; 4.3 Overlong representation of the NUL character
- #u8(#xC0 #x80) ; #\u+0000
- #u8(#xE0 #x80 #x80) ; #\u+0000
- #u8(#xF0 #x80 #x80 #x80) ; #\u+0000
- #u8(#xF8 #x80 #x80 #x80 #x80) ; #\u+0000
- #u8(#xFC #x80 #x80 #x80 #x80 #x80) ; #\u+0000
+ #u8(#xC0 #x80) ; #\x0000
+ #u8(#xE0 #x80 #x80) ; #\x0000
+ #u8(#xF0 #x80 #x80 #x80) ; #\x0000
+ #u8(#xF8 #x80 #x80 #x80 #x80) ; #\x0000
+ #u8(#xFC #x80 #x80 #x80 #x80 #x80) ; #\x0000
;; 3.5 Impossible bytes
#u8(#xFE)
#u8(#xFE #xFE #xFF #xFF)
;; 5.1 Single UTF-16 surrogates
- #u8(#xED #xA0 #x80) ; #\u+D800
- #u8(#xED #xAD #xBF) ; #\u+DB7F
- #u8(#xED #xAE #x80) ; #\u+DB80
- #u8(#xED #xAF #xBF) ; #\u+DBFF
- #u8(#xED #xB0 #x80) ; #\u+DC00
- #u8(#xED #xBE #x80) ; #\u+DF80
- #u8(#xED #xBF #xBF) ; #\u+DFFF
+ #u8(#xED #xA0 #x80) ; #\xD800
+ #u8(#xED #xAD #xBF) ; #\xDB7F
+ #u8(#xED #xAE #x80) ; #\xDB80
+ #u8(#xED #xAF #xBF) ; #\xDBFF
+ #u8(#xED #xB0 #x80) ; #\xDC00
+ #u8(#xED #xBE #x80) ; #\xDF80
+ #u8(#xED #xBF #xBF) ; #\xDFFF
;; 5.2 Paired UTF-16 surrogates
- ;; (#\u+D800 #\u+DC00 #u8(#xED #xA0 #x80 #xED #xB0 #x80))
- ;; (#\u+D800 #\u+DFFF #u8(#xED #xA0 #x80 #xED #xBF #xBF))
- ;; (#\u+DB7F #\u+DC00 #u8(#xED #xAD #xBF #xED #xB0 #x80))
- ;; (#\u+DB7F #\u+DFFF #u8(#xED #xAD #xBF #xED #xBF #xBF))
- ;; (#\u+DB80 #\u+DC00 #u8(#xED #xAE #x80 #xED #xB0 #x80))
- ;; (#\u+DB80 #\u+DFFF #u8(#xED #xAE #x80 #xED #xBF #xBF))
- ;; (#\u+DBFF #\u+DC00 #u8(#xED #xAF #xBF #xED #xB0 #x80))
- ;; (#\u+DBFF #\u+DFFF #u8(#xED #xAF #xBF #xED #xBF #xBF))
-
- ;; 5.3 Noncharacter code positions
- ;; Particularly problematic noncharacters in 16-bit applications:
- #u8(#xEF #xBF #xBE) ; #\u+FFFE
- #u8(#xEF #xBF #xBF) ; #\u+FFFF
+ ;; (#\xD800 #\xDC00 #u8(#xED #xA0 #x80 #xED #xB0 #x80))
+ ;; (#\xD800 #\xDFFF #u8(#xED #xA0 #x80 #xED #xBF #xBF))
+ ;; (#\xDB7F #\xDC00 #u8(#xED #xAD #xBF #xED #xB0 #x80))
+ ;; (#\xDB7F #\xDFFF #u8(#xED #xAD #xBF #xED #xBF #xBF))
+ ;; (#\xDB80 #\xDC00 #u8(#xED #xAE #x80 #xED #xB0 #x80))
+ ;; (#\xDB80 #\xDFFF #u8(#xED #xAE #x80 #xED #xBF #xBF))
+ ;; (#\xDBFF #\xDC00 #u8(#xED #xAF #xBF #xED #xB0 #x80))
+ ;; (#\xDBFF #\xDFFF #u8(#xED #xAF #xBF #xED #xBF #xBF))
))
\ No newline at end of file