#| -*-Scheme-*-
-$Id: runtime.pkg,v 14.655 2008/07/27 04:24:13 cph Exp $
+$Id: runtime.pkg,v 14.656 2008/08/18 00:12:49 cph Exp $
Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994,
1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
char-in-alphabet?
char-set->alphabet
code-points->alphabet
+ combine-utf16-surrogates
error:not-8-bit-alphabet
error:not-alphabet
error:not-unicode-code-point
error:not-utf16-be-string
+ error:not-utf16-high-surrogate
error:not-utf16-le-string
+ error:not-utf16-low-surrogate
error:not-utf16-string
error:not-utf32-be-string
error:not-utf32-le-string
guarantee-alphabet
guarantee-unicode-code-point
guarantee-utf16-be-string
+ guarantee-utf16-high-surrogate
guarantee-utf16-le-string
+ guarantee-utf16-low-surrogate
guarantee-utf16-string
guarantee-utf32-be-string
guarantee-utf32-le-string
open-utf32-output-string
open-utf8-input-string
open-utf8-output-string
+ split-into-utf16-surrogates
string->alphabet
string->utf16-be-string
string->utf16-le-string
utf16-be-string-length
utf16-be-string-valid?
utf16-be-string?
+ utf16-high-surrogate?
utf16-le-string->wide-string
utf16-le-string-length
utf16-le-string-valid?
utf16-le-string?
+ utf16-low-surrogate?
utf16-string->wide-string
utf16-string-length
utf16-string-valid?
#| -*-Scheme-*-
-$Id: unicode.scm,v 1.41 2008/08/17 09:42:29 cph Exp $
+$Id: unicode.scm,v 1.42 2008/08/18 00:12:53 cph Exp $
Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994,
1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
(if (fix:< start end)
(and (fix:<= (fix:+ start 2) end)
(let ((d0 (combiner (n 0) (n 1))))
- (if (high-surrogate? d0)
+ (if (utf16-high-surrogate? d0)
(and (fix:<= (fix:+ start 4) end)
- (low-surrogate? (combiner (n 2) (n 3)))
+ (utf16-low-surrogate? (combiner (n 2) (n 3)))
(fix:+ start 4))
(and (legal-code-16? d0)
(fix:+ start 2)))))
(define (le-octets->digit16 b0 b1)
(fix:or (fix:lsh b1 8) b0))
-(define-integrable (high-surrogate? n)
- (fix:= #xD800 (fix:and #xFC00 n)))
-
-(define-integrable (low-surrogate? n)
- (fix:= #xDC00 (fix:and #xFC00 n)))
-
-(define-integrable (combine-surrogates n0 n1)
- (fix:+ (fix:+ (fix:lsh (fix:and n0 #x3FF) 10)
- (fix:and n1 #x3FF))
+(define (combine-utf16-surrogates h l)
+ (guarantee-utf16-high-surrogate h 'combine-utf16-surrogates)
+ (guarantee-utf16-low-surrogate l 'combine-utf16-surrogates)
+ (fix:+ (fix:+ (fix:lsh (fix:and h #x3FF) 10)
+ (fix:and l #x3FF))
#x10000))
+(define (split-into-utf16-surrogates n)
+ (guarantee-unicode-code-point n 'split-into-utf16-surrogates)
+ (let ((n (fix:- n #x10000)))
+ (values (fix:or (fix:and (fix:lsh n -10) #x03FF) #xD800)
+ (fix:or (fix:and n #x03FF) #xDC00))))
+
(define (utf16-string? object)
(and (string? object)
(utf16-string-valid? object)))
(and (string? object)
(utf16-le-string-valid? object)))
+(define (utf16-high-surrogate? n)
+ (and (index-fixnum? n)
+ (fix:= #xD800 (fix:and #xFC00 n))))
+
+(define (utf16-low-surrogate? n)
+ (and (index-fixnum? n)
+ (fix:= #xDC00 (fix:and #xFC00 n))))
+
(define-guarantee utf16-string "UTF-16 string")
(define-guarantee utf16-be-string "UTF-16BE string")
(define-guarantee utf16-le-string "UTF-16LE string")
+(define-guarantee utf16-high-surrogate "UTF-16 high surrogate")
+(define-guarantee utf16-low-surrogate "UTF-16 low surrogate")
\f
;;;; UTF-8 representation