From 2604f97f60f53851d3a255e4835c603fee1e4c87 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Wed, 15 Feb 2017 01:27:38 -0800 Subject: [PATCH] Change character sets to be defined over code points. --- src/runtime/chrset.scm | 31 ++++++++++++++----------------- src/runtime/runtime.pkg | 2 +- tests/runtime/test-char-set.scm | 2 +- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/src/runtime/chrset.scm b/src/runtime/chrset.scm index 923e388e9..56251effa 100644 --- a/src/runtime/chrset.scm +++ b/src/runtime/chrset.scm @@ -38,9 +38,6 @@ USA. ;;; ;;; The HIGH range sequence is implemented as a u32 bytevector of alternating ;;; START and END points. The vector always has an even number of points. -;;; -;;; For simplicity, character sets are allowed to contain any code point. -;;; However, CHAR-IN-SET? only accepts Unicode characters. (define-record-type (%make-char-set low high) @@ -287,7 +284,7 @@ USA. (define (%cpl-element->ranges elt) (cond ((%range? elt) (list elt)) - ((unicode-char? elt) (list (char->integer elt))) + ((base-char? elt) (list (char->integer elt))) ((ustring? elt) (map char->integer (ustring->list elt))) (else #f))) @@ -339,7 +336,7 @@ USA. (define (cpl-element? object) (or (%range? object) - (unicode-char? object) + (base-char? object) (ustring? object) (char-set? object))) @@ -386,23 +383,23 @@ USA. ;;;; Accessors (define (char-in-set? char char-set) - (guarantee unicode-char? char 'char-in-set?) - (%scalar-value-in-char-set? (char->integer char) char-set)) + (guarantee base-char? char 'char-in-set?) + (%code-point-in-char-set? (char->integer char) char-set)) -(define (scalar-value-in-char-set? sv char-set) - (guarantee unicode-scalar-value? sv 'scalar-value-in-char-set?) - (%scalar-value-in-char-set? sv char-set)) +(define (code-point-in-char-set? cp char-set) + (guarantee unicode-code-point? cp 'code-point-in-char-set?) + (%code-point-in-char-set? cp char-set)) -(define (%scalar-value-in-char-set? sv char-set) - (if (fix:< sv (%low-limit (%char-set-low char-set))) - (%low-ref (%char-set-low char-set) sv) +(define (%code-point-in-char-set? cp char-set) + (if (fix:< cp (%low-limit (%char-set-low char-set))) + (%low-ref (%char-set-low char-set) cp) (let ((high (%char-set-high char-set))) (let loop ((lower 0) (upper (%high-length high))) (if (fix:< lower upper) (let ((i (fix:* 2 (fix:quotient (fix:+ lower upper) 4)))) - (cond ((fix:< sv (%high-ref high i)) + (cond ((fix:< cp (%high-ref high i)) (loop lower i)) - ((fix:>= sv (%high-ref high (fix:+ i 1))) + ((fix:>= cp (%high-ref high (fix:+ i 1))) (loop (fix:+ i 2) upper)) (else #t))) #f))))) @@ -545,7 +542,7 @@ USA. (define (char-set-members char-set) (let loop ((cp 0)) (if (fix:< cp #x80) - (if (%scalar-value-in-char-set? cp char-set) + (if (%code-point-in-char-set? cp char-set) (cons (integer->char cp) (loop (fix:+ cp 1))) (loop (fix:+ cp 1))) @@ -567,7 +564,7 @@ USA. (do ((cp 0 (fix:+ cp 1))) ((not (fix:< cp #x100))) (vector-8b-set! table cp - (if (%scalar-value-in-char-set? cp char-set) 1 0))) + (if (%code-point-in-char-set? cp char-set) 1 0))) table)) (define (8-bit-char-set? char-set) diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index 338a21614..6eff7498f 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -1515,8 +1515,8 @@ USA. char-wsp? code-point-list? char-set* + code-point-in-char-set? compute-char-set - scalar-value-in-char-set? string->char-set) (export (runtime string) (char-set-table %char-set-table))) diff --git a/tests/runtime/test-char-set.scm b/tests/runtime/test-char-set.scm index 83d294bf5..2dcfa7232 100644 --- a/tests/runtime/test-char-set.scm +++ b/tests/runtime/test-char-set.scm @@ -87,7 +87,7 @@ USA. (char-in-set? (integer->char value) (char-set* svl)) (named-call 'SVL-MEMBER? svl-member? svl value)) (assert-boolean= - (scalar-value-in-char-set? value (char-set* svl)) + (code-point-in-char-set? value (char-set* svl)) (named-call 'SVL-MEMBER? svl-member? svl value))) 'EXPRESSION `(CHAR-IN-SET? ,value ,svl))) (enumerate-test-values))) -- 2.25.1