From 29c4cb4893ce7428d4d762e433ec4cc3ee527f2f Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Mon, 23 Feb 2004 20:49:32 +0000 Subject: [PATCH] Add support for UTF-32. --- v7/src/runtime/genio.scm | 56 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/v7/src/runtime/genio.scm b/v7/src/runtime/genio.scm index 392ecbc5d..bc6b8c077 100644 --- a/v7/src/runtime/genio.scm +++ b/v7/src/runtime/genio.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: genio.scm,v 1.23 2004/02/17 04:59:29 cph Exp $ +$Id: genio.scm,v 1.24 2004/02/23 20:49:32 cph Exp $ Copyright 1991,1993,1995,1996,1999,2002 Massachusetts Institute of Technology Copyright 2003,2004 Massachusetts Institute of Technology @@ -1267,6 +1267,60 @@ USA. (extract n1 #x3FF 0)) #x10000)) +(define-decoder 'UTF-32-BE + (lambda (ib) + (let ((bv (input-buffer-bytes ib)) + (bs (input-buffer-start ib))) + (and (fix:<= (fix:+ bs 4) (input-buffer-end ib)) + (let ((cp + (+ (* (get-byte bv bs 0) #x1000000) + (* (get-byte bv bs 1) #x10000) + (* (get-byte bv bs 2) #x100) + (get-byte bv bs 3)))) + (if (unicode-code-point? cp) + (begin + (set-input-buffer-start! ib (fix:+ bs 4)) + cp) + (error:char-decoding ib))))))) + +(define-decoder 'UTF-32-LE + (lambda (ib) + (let ((bv (input-buffer-bytes ib)) + (bs (input-buffer-start ib))) + (and (fix:<= (fix:+ bs 4) (input-buffer-end ib)) + (let ((cp + (+ (* (get-byte bv bs 3) #x1000000) + (* (get-byte bv bs 2) #x10000) + (* (get-byte bv bs 1) #x100) + (get-byte bv bs 0)))) + (if (unicode-code-point? cp) + (begin + (set-input-buffer-start! ib (fix:+ bs 4)) + cp) + (error:char-decoding ib))))))) + +(define-encoder 'UTF-32-BE + (lambda (ob cp) + (if (fix:< cp #x110000) + (let ((bv (output-buffer-bytes ob)) + (bs (output-buffer-start ob))) + (put-byte bv bs 0 #x00) + (put-byte bv bs 1 (fix:and (fix:lsh cp #x10) #xFF)) + (put-byte bv bs 2 (fix:and (fix:lsh cp #x08) #xFF)) + (put-byte bv bs 3 (fix:and cp #xFF))) + (error:char-encoding ob cp)))) + +(define-encoder 'UTF-32-LE + (lambda (ob cp) + (if (fix:< cp #x110000) + (let ((bv (output-buffer-bytes ob)) + (bs (output-buffer-start ob))) + (put-byte bv bs 0 (fix:and cp #xFF)) + (put-byte bv bs 1 (fix:and (fix:lsh cp #x08) #xFF)) + (put-byte bv bs 2 (fix:and (fix:lsh cp #x10) #xFF)) + (put-byte bv bs 3 #x00)) + (error:char-encoding ob cp)))) + ;;;; Normalizers (define-normalizer 'NEWLINE -- 2.25.1