From 2f1f479db44649b07917fcaed727668211e11115 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Sun, 29 Jan 2017 18:40:53 -0800 Subject: [PATCH] Refactor the character set abstraction: * Clarify the use of "code point" versus "scalar value". * Rename well-formed-scalar-value-list? to code-point-list? and broaden its scope to allow characters, strings, and character sets. * Rename scalar-values->char-set to char-set* and broaden its domain to include any code-point-list?. * Rename char-set->scalar-values to char-set->code-points. * Implement char-in-set? which is char-member? with the args reversed. This makes it consistent with scalar-value-in-char-set?. Deprecate char-member?. * Implement char-set-union* and char-set-intersection*. * Eliminate all of the "alphabet" names which are obsolete. * Eliminate guarantee-char-set and error:not-char-set. --- src/runtime/chrset.scm | 251 ++++++++++++++--------------- src/runtime/parse.scm | 6 +- src/runtime/predicate-metadata.scm | 5 +- src/runtime/regsexp.scm | 19 +-- src/runtime/rgxcmp.scm | 2 +- src/runtime/runtime.pkg | 51 ++---- src/runtime/string.scm | 14 +- src/xml/turtle.scm | 6 +- src/xml/xml-chars.scm | 12 +- tests/runtime/test-char-set.scm | 14 +- tests/runtime/test-regsexp.scm | 8 +- 11 files changed, 167 insertions(+), 221 deletions(-) diff --git a/src/runtime/chrset.scm b/src/runtime/chrset.scm index 945c920d2..22053ee4c 100644 --- a/src/runtime/chrset.scm +++ b/src/runtime/chrset.scm @@ -29,35 +29,26 @@ USA. (declare (usual-integrations)) -;;; The character set is stored in two parts. The LOW part is a -;;; bit-vector encoding of the scalar values below %LOW-LIMIT. The -;;; HIGH part is a sequence of scalar-value ranges, each of which has -;;; an inclusive START and an exclusive END. The ranges in the -;;; sequence are all disjoint from one another, and no two ranges are -;;; adjacent. These ranges are sorted so that their STARTs are in -;;; order. +;;; The character set is stored in two parts. The LOW part is a bit-vector +;;; encoding of the code points below %LOW-LIMIT. The HIGH part is a sequence +;;; of code-point ranges, each of which has an inclusive START and an +;;; exclusive END. The ranges in the sequence are all disjoint from one +;;; another, and no two ranges are adjacent. These ranges are sorted so that +;;; their STARTs are in order. ;;; -;;; The HIGH range sequence is implemented as a vector of alternating -;;; START and END points. The vector always has an even number of -;;; points. +;;; The HIGH range sequence is implemented as a vector of alternating START and +;;; END points. The vector always has an even number of points. ;;; -;;; For simplicity, character sets are allowed to contain ranges that -;;; contain illegal scalar values. However, CHAR-SET-MEMBER? doesn't -;;; accept illegal characters. - -(define-structure (char-set (type-descriptor ) - (constructor %%make-char-set) - (conc-name %char-set-)) - (low #f read-only #t) - (high #f read-only #t) +;;; For simplicity, character sets are allowed to contain any code point. +;;; However, CHAR-SET-MEMBER? only accepts scalar values. + +(define-record-type + (%%make-char-set low high table) + char-set? + (low %char-set-low) + (high %char-set-high) ;; Backwards compatibility: - (table #f read-only #t)) - -(define-guarantee char-set "character set") - -(define (guarantee-char-sets char-sets #!optional caller) - (for-each (lambda (char-set) (guarantee-char-set char-set caller)) - char-sets)) + (table %char-set-table)) (define (%make-char-set low high) (%%make-char-set low high @@ -98,22 +89,23 @@ USA. (define-guarantee 8-bit-char-set "an 8-bit char-set") -;;;; Conversion to and from scalar-values list +;;;; Code-point lists -(define (well-formed-scalar-value-list? ranges) - (list-of-type? ranges well-formed-scalar-value-range?)) +(define (code-point-list? object) + (list-of-type? object cpl-element?)) -(define (well-formed-scalar-value-range? range) - (if (pair? range) - (and (index-fixnum? (car range)) - (index-fixnum? (cdr range)) - (fix:<= (car range) (cdr range)) - (fix:<= (cdr range) char-code-limit)) - (and (index-fixnum? range) - (fix:< range char-code-limit)))) +(define (cpl-element? object) + (or (%range? object) + (unicode-char? object) + (ustring? object) + (char-set? object))) -(define-guarantee well-formed-scalar-value-list "a Unicode scalar-value list") -(define-guarantee well-formed-scalar-value-range "a Unicode scalar-value range") +(define (%range? object) + (or (and (pair? object) + (unicode-code-point? (car object)) + (unicode-code-point? (cdr object)) + (fix:<= (car object) (cdr object))) + (unicode-code-point? object))) (define (%make-range start end) (if (fix:= (fix:- end start) 1) @@ -130,13 +122,15 @@ USA. (cdr range) (fix:+ range 1))) -(define (char-set->scalar-values char-set) - (guarantee-char-set char-set 'CHAR-SET->SCALAR-VALUES) +;;;; Convert char-set to code-point list + +(define (char-set->code-points char-set) + (guarantee char-set? char-set 'char-set->code-points) (reverse! - (%high->scalar-values (%char-set-high char-set) - (%low->scalar-values (%char-set-low char-set))))) + (%high->code-points (%char-set-high char-set) + (%low->code-points (%char-set-low char-set))))) -(define (%low->scalar-values low) +(define (%low->code-points low) (define (find-start i result) (if (fix:< i %low-limit) @@ -155,7 +149,7 @@ USA. (find-start 0 '())) -(define (%high->scalar-values high result) +(define (%high->code-points high result) (let ((n (vector-length high))) (define (loop i result) (if (fix:< i n) @@ -174,18 +168,45 @@ USA. (vector-ref high 1)) (cdr result))) (loop 0 result)))) + +;;;; General char-set constructor -(define (scalar-values->char-set ranges) - (guarantee-well-formed-scalar-value-list ranges 'SCALAR-VALUES->CHAR-SET) - (%scalar-values->char-set ranges)) - -(define (%scalar-values->char-set ranges) +(define (char-set . chars) + (char-set* chars)) + +(define (char-set* cpl) + (guarantee-list-of cpl-element? cpl 'char-set*) + (char-set-union* (%cpl->char-sets cpl))) + +(define (%cpl->char-sets cpl) + (let loop ((cpl cpl) (ranges '()) (char-sets '())) + (cond ((not (pair? cpl)) + (cons (%ranges->char-set ranges) char-sets)) + ((%cpl-element->ranges (car cpl)) + => (lambda (ranges*) + (loop (cdr cpl) + (append ranges* ranges) + char-sets))) + ((char-set? (car cpl)) + (loop (cdr cpl) + ranges + (cons (car cpl) char-sets))) + (else + (error:not-a cpl-element? (car cpl)))))) + +(define (%cpl-element->ranges elt) + (cond ((%range? elt) (list elt)) + ((unicode-char? elt) (list (char->integer elt))) + ((ustring? elt) (map char->integer (ustring->list elt))) + (else #f))) + +(define (%ranges->char-set ranges) (receive (low-ranges high-ranges) - (%split-ranges (%canonicalize-scalar-value-list ranges)) - (%make-char-set (%scalar-values->low low-ranges) - (%scalar-values->high high-ranges)))) + (%split-ranges (%canonicalize-ranges ranges)) + (%make-char-set (%code-points->low low-ranges) + (%code-points->high high-ranges)))) -(define (%scalar-values->low ranges) +(define (%code-points->low ranges) (let ((low (%make-low 0))) (for-each (lambda (range) (let ((end (%range-end range))) @@ -195,7 +216,7 @@ USA. ranges) low)) -(define (%scalar-values->high ranges) +(define (%code-points->high ranges) (let ((high (make-vector (fix:* 2 (length ranges))))) (do ((ranges ranges (cdr ranges)) (i 0 (fix:+ i 2))) @@ -204,9 +225,8 @@ USA. (vector-set! high (fix:+ i 1) (%range-end (car ranges)))) high)) -(define (%canonicalize-scalar-value-list ranges) - ;; Sort ranges in order, delete empty ranges, then merge adjacent - ;; ranges. +(define (%canonicalize-ranges ranges) + ;; Sorts ranges in order, deletes empty ranges, then merges adjacent ranges. (let ((ranges (filter! (lambda (range) (fix:< (%range-start range) @@ -264,38 +284,38 @@ USA. ;;;; Predicates -(define (char-set-member? char-set char) - (guarantee-char-set char-set 'CHAR-SET-MEMBER?) - (guarantee-char char 'CHAR-SET-MEMBER?) - (%scalar-value-in-char-set? (char-code char) char-set)) - -(define (scalar-value-in-char-set? scalar-value char-set) - (guarantee-unicode-scalar-value scalar-value 'SCALAR-VALUE-IN-CHAR-SET?) - (guarantee-char-set char-set 'SCALAR-VALUE-IN-CHAR-SET?) - (%scalar-value-in-char-set? scalar-value char-set)) - -(define (%scalar-value-in-char-set? value char-set) - (if (fix:< value %low-limit) - (%low-ref (%char-set-low char-set) value) +(define (char-in-set? char char-set) + (guarantee unicode-char? char 'char-in-set?) + (guarantee char-set? char-set 'char-in-set?) + (%scalar-value-in-char-set? (char->integer char) char-set)) + +(define (scalar-value-in-char-set? sv char-set) + (guarantee unicode-scalar-value? sv 'scalar-value-in-char-set?) + (guarantee char-set? char-set 'scalar-value-in-char-set?) + (%scalar-value-in-char-set? sv char-set)) + +(define (%scalar-value-in-char-set? sv char-set) + (if (fix:< sv %low-limit) + (%low-ref (%char-set-low char-set) sv) (let ((high (%char-set-high char-set))) (let loop ((lower 0) (upper (vector-length high))) (if (fix:< lower upper) (let ((i (fix:* 2 (fix:quotient (fix:+ lower upper) 4)))) - (cond ((fix:< value (vector-ref high i)) + (cond ((fix:< sv (vector-ref high i)) (loop lower i)) - ((fix:>= value (vector-ref high (fix:+ i 1))) + ((fix:>= sv (vector-ref high (fix:+ i 1))) (loop (fix:+ i 2) upper)) (else #t))) #f))))) (define (char-set-predicate char-set) - (guarantee-char-set char-set 'CHAR-SET-PREDICATE) + (guarantee char-set? char-set 'CHAR-SET-PREDICATE) (lambda (char) (char-set-member? char-set char))) (define (char-set=? char-set . char-sets) - (guarantee-char-set char-set 'CHAR-SET=?) - (guarantee-char-sets char-sets 'CHAR-SET=?) + (guarantee char-set? char-set 'CHAR-SET=?) + (guarantee-list-of char-set? char-sets 'CHAR-SET=?) (every (lambda (char-set*) (%=? char-set* char-set)) char-sets)) @@ -323,7 +343,7 @@ USA. ;;;; Mapping operations (define (char-set-invert char-set) - (guarantee-char-set char-set 'CHAR-SET-INVERT) + (guarantee char-set? char-set 'CHAR-SET-INVERT) (%invert char-set)) (define (%invert cs1) @@ -365,7 +385,10 @@ USA. (vector %low-limit char-code-limit)))) (define (char-set-union . char-sets) - (guarantee-char-sets char-sets 'CHAR-SET-UNION) + (char-set-union* char-sets)) + +(define (char-set-union* char-sets) + (guarantee-list-of char-set? char-sets 'char-set-union*) (reduce %union %null-char-set char-sets)) (define (%union cs1 cs2) @@ -375,7 +398,10 @@ USA. cs2)) (define (char-set-intersection . char-sets) - (guarantee-char-sets char-sets 'CHAR-SET-INTERSECTION) + (char-set-intersection* char-sets)) + +(define (char-set-intersection* char-sets) + (guarantee-list-of char-set? char-sets 'char-set-intersection*) (reduce %intersection %null-char-set char-sets)) (define (%intersection cs1 cs2) @@ -385,8 +411,8 @@ USA. cs2)) (define (char-set-difference char-set . char-sets) - (guarantee-char-set char-set 'CHAR-SET-DIFFERENCE) - (guarantee-char-sets char-sets 'CHAR-SET-DIFFERENCE) + (guarantee char-set? char-set 'char-set-difference) + (guarantee-list-of char-set? char-sets 'char-set-difference) (fold-left %difference char-set char-sets)) (define (%difference cs1 cs2) @@ -489,21 +515,21 @@ USA. ;;;; Standard character sets (define-deferred char-set:upper-case - (scalar-values->char-set '((#x41 . #x5B) (#xC0 . #xD7) (#xD8 . #xDE)))) + (char-set* '((#x41 . #x5B) (#xC0 . #xD7) (#xD8 . #xDE)))) (define-deferred char-set:not-upper-case (char-set-invert char-set:upper-case)) (define-deferred char-upper-case? (char-set-predicate char-set:upper-case)) (define-deferred char-set:lower-case - (scalar-values->char-set '((#x61 . #x7B) (#xE0 . #xF7) (#xF8 . #xFF)))) + (char-set* '((#x61 . #x7B) (#xE0 . #xF7) (#xF8 . #xFF)))) (define-deferred char-set:not-lower-case (char-set-invert char-set:lower-case)) (define-deferred char-lower-case? (char-set-predicate char-set:lower-case)) -(define-deferred char-set:numeric (scalar-values->char-set '((#x30 . #x3A)))) +(define-deferred char-set:numeric (char-set* '((#x30 . #x3A)))) (define-deferred char-set:not-numeric (char-set-invert char-set:numeric)) (define-deferred char-numeric? (char-set-predicate char-set:numeric)) (define-deferred char-set:graphic - (scalar-values->char-set '((#x20 . #x7F) (#xA0 . #x100)))) + (char-set* '((#x20 . #x7F) (#xA0 . #x100)))) (define-deferred char-set:not-graphic (char-set-invert char-set:graphic)) (define-deferred char-graphic? (char-set-predicate char-set:graphic)) @@ -534,11 +560,9 @@ USA. ;;; Used in RFCs: -(define-deferred char-set:ascii - (scalar-values->char-set '((#x00 . #x80)))) +(define-deferred char-set:ascii (char-set* '((#x00 . #x80)))) -(define-deferred char-set:ctls - (scalar-values->char-set '((#x00 . #x20) #x7F))) +(define-deferred char-set:ctls (char-set* '((#x00 . #x20) #x7F))) (define-deferred char-ctl? (char-set-predicate char-set:ctls)) (define-deferred char-set:wsp (char-set #\space #\tab)) @@ -546,8 +570,11 @@ USA. ;;;; Backwards compatibility +(define (char-set-member? char-set char) + (char-in-set? char char-set)) + (define (string->char-set string) - (scalar-values->char-set (map char->integer (string->list string)))) + (char-set* (map char->integer (string->list string)))) ;; Returns ASCII string: (define (char-set->string char-set) @@ -555,7 +582,7 @@ USA. ;; Returns only ASCII members: (define (char-set-members char-set) - (guarantee-char-set char-set 'CHAR-SET-MEMBERS) + (guarantee char-set? char-set 'CHAR-SET-MEMBERS) (let ((low (%char-set-low char-set))) (let loop ((code 0)) (if (fix:< code #x80) @@ -565,16 +592,6 @@ USA. (loop (fix:+ code 1))) '())))) -(define (char-set . chars) - (for-each (lambda (char) - (guarantee-char char 'CHAR-SET)) - chars) - (%scalar-values->char-set (map char->integer chars))) - -(define (chars->char-set chars) - (guarantee-list-of-type chars char? "character" 'CHARS->CHAR-SET) - (%scalar-values->char-set (map char->integer chars))) - (define (ascii-range->char-set start end) (if (not (index-fixnum? start)) (error:wrong-type-argument start "index fixnum" 'ASCII-RANGE->CHAR-SET)) @@ -584,34 +601,4 @@ USA. (error:bad-range-argument start 'ASCII-RANGE->CHAR-SET)) (if (not (fix:<= end #x100)) (error:bad-range-argument end 'ASCII-RANGE->CHAR-SET)) - (%scalar-values->char-set (list (cons start end)))) - -(define (alphabet->char-set char-set) - char-set) - -(define (char-set->alphabet char-set) - char-set) - -(define (char-in-alphabet? char char-set) - (char-set-member? char-set char)) - -(define (alphabet->scalar-values char-set) - (map (lambda (range) - (if (pair? range) - (cons (car range) - (fix:- (cdr range) 1)) - range)) - (char-set->scalar-values char-set))) - -(define (scalar-values->alphabet ranges) - (guarantee-well-formed-scalar-value-list ranges 'SCALAR-VALUES->ALPHABET) - (%scalar-values->char-set - (map (lambda (range) - (if (pair? range) - (cons (car range) - (if (fix:< (cdr range) char-code-limit) - (fix:+ (cdr range) 1) - (error:bad-range-argument (cdr range) - 'SCALAR-VALUES->ALPHABET))) - range)) - ranges))) \ No newline at end of file + (char-set (cons start end))) \ No newline at end of file diff --git a/src/runtime/parse.scm b/src/runtime/parse.scm index 16078c505..3618cde89 100644 --- a/src/runtime/parse.scm +++ b/src/runtime/parse.scm @@ -306,7 +306,7 @@ USA. value) (define (char-set-converter value) - (guarantee-char-set value) + (guarantee char-set? value) value) (define (keyword-style-converter value) @@ -967,8 +967,8 @@ USA. environment))) (atom-delimiters (get-param:parser-atom-delimiters environment)) (constituents (get-param:parser-constituents environment))) - (guarantee-char-set atom-delimiters #f) - (guarantee-char-set constituents #f) + (guarantee char-set? atom-delimiters #f) + (guarantee char-set? constituents #f) (make-db (get-param:parser-associate-positions? environment) atom-delimiters (overridable-value diff --git a/src/runtime/predicate-metadata.scm b/src/runtime/predicate-metadata.scm index bed365b92..eb2b8dd95 100644 --- a/src/runtime/predicate-metadata.scm +++ b/src/runtime/predicate-metadata.scm @@ -290,6 +290,7 @@ USA. (register-predicate! ascii-char? 'ascii-char '<= 8-bit-char?) (register-predicate! bit-string? 'bit-string) (register-predicate! cell? 'cell) + (register-predicate! code-point-list? 'code-point-list '<= list?) (register-predicate! compiled-code-address? 'compiled-code-address) (register-predicate! compiled-code-block? 'compiled-code-block) (register-predicate! compiled-expression? 'compiled-expression) @@ -310,8 +311,10 @@ USA. (register-predicate! thread-mutex? 'thread-mutex) (register-predicate! undefined-value? 'undefined-value) (register-predicate! unicode-char? 'unicode-char '<= char?) - (register-predicate! unicode-scalar-value? 'unicode-scalar-value + (register-predicate! unicode-code-point? 'unicode-code-point '<= index-fixnum?) + (register-predicate! unicode-scalar-value? 'unicode-scalar-value + '<= unicode-code-point?) (register-predicate! uninterned-symbol? 'uninterned-symbol '<= symbol?) (register-predicate! weak-list? 'weak-list) (register-predicate! weak-pair? 'weak-pair) diff --git a/src/runtime/regsexp.scm b/src/runtime/regsexp.scm index 60c12594c..4187524b2 100644 --- a/src/runtime/regsexp.scm +++ b/src/runtime/regsexp.scm @@ -77,21 +77,6 @@ USA. (else (error "Ill-formed regular s-expression:" regsexp)))) -(define (%compile-char-set items) - (scalar-values->char-set - (append-map (lambda (item) - (cond ((well-formed-scalar-value-range? item) - (list item)) - ((unicode-char? item) - (list (char->integer item))) - ((char-set? item) - (char-set->scalar-values item)) - ((string? item) - (map char->integer (string->list item))) - (else - (error "Ill-formed char-set item:" item)))) - items))) - (define (%compile-group-key key) (if (not (or (fix:fixnum? key) (unicode-char? key) @@ -151,11 +136,11 @@ USA. (define-rule '(CHAR-SET * DATUM) (lambda items - (insn:char-set (%compile-char-set items)))) + (insn:char-set (char-set* items)))) (define-rule '(INVERSE-CHAR-SET * DATUM) (lambda items - (insn:inverse-char-set (%compile-char-set items)))) + (insn:inverse-char-set (char-set* items)))) (define-rule '(LINE-START) (lambda () (insn:line-start))) (define-rule '(LINE-END) (lambda () (insn:line-end))) diff --git a/src/runtime/rgxcmp.scm b/src/runtime/rgxcmp.scm index 4c0667a03..30c69a05e 100644 --- a/src/runtime/rgxcmp.scm +++ b/src/runtime/rgxcmp.scm @@ -265,7 +265,7 @@ USA. (define (re-compile-char-set pattern negate?) (receive (scalar-values negate?*) (re-char-pattern->scalar-values pattern) - (let ((char-set (scalar-values->char-set scalar-values))) + (let ((char-set (char-set* scalar-values))) (if (if negate? (not negate?*) negate?*) (char-set-invert char-set) char-set)))) diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index 9a2e6567b..40ba8e67a 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -1401,31 +1401,13 @@ USA. (parent (runtime)) (export () ;; BEGIN deprecated bindings - (8-bit-alphabet? 8-bit-char-set?) - (alphabet char-set) - (alphabet+ char-set-union) - (alphabet- char-set-difference) - (alphabet->string char-set->string) - (alphabet-predicate char-set-predicate) - (alphabet? char-set?) - (error:not-8-bit-alphabet error:not-8-bit-char-set) - (error:not-alphabet error:not-char-set) - (guarantee-8-bit-alphabet guarantee-8-bit-char-set) - (guarantee-alphabet guarantee-char-set) - (string->alphabet string->char-set) - alphabet->char-set - alphabet->scalar-values - char-in-alphabet? - char-set->alphabet + (char-set->scalar-values char-set->code-points) + (chars->char-set char-set*) + (scalar-values->char-set char-set*) + (well-formed-scalar-value-list? code-point-list?) + char-set-member? error:not-8-bit-char-set - error:not-char-set - error:not-well-formed-scalar-value-list - error:not-well-formed-scalar-value-range guarantee-8-bit-char-set - guarantee-char-set - guarantee-well-formed-scalar-value-list - guarantee-well-formed-scalar-value-range - scalar-values->alphabet ;; END deprecated bindings 8-bit-char-set? ascii-range->char-set @@ -1433,17 +1415,20 @@ USA. char-alphanumeric? char-ctl? char-graphic? + char-in-set? char-lower-case? char-numeric? char-set - char-set->scalar-values + char-set* + char-set->code-points char-set-difference char-set-intersection + char-set-intersection* char-set-invert - char-set-member? char-set-members char-set-predicate char-set-union + char-set-union* char-set:alphabetic char-set:alphanumeric char-set:ascii @@ -1470,12 +1455,10 @@ USA. char-upper-case? char-whitespace? char-wsp? - chars->char-set + code-point-list? + char-set* scalar-value-in-char-set? - scalar-values->char-set - string->char-set - well-formed-scalar-value-list? - well-formed-scalar-value-range?) + string->char-set) (export (runtime string) (char-set-table %char-set-table))) @@ -5651,14 +5634,6 @@ USA. (export () ;; BEGIN deprecated bindings (input-port->parser-buffer textual-input-port->parser-buffer) - (match-parser-buffer-char-in-alphabet match-parser-buffer-char-in-set) - (match-parser-buffer-char-in-alphabet-no-advance - match-parser-buffer-char-in-set-no-advance) - (match-parser-buffer-char-not-in-alphabet - match-parser-buffer-char-not-in-set) - (match-parser-buffer-char-not-in-alphabet-no-advance - match-parser-buffer-char-not-in-set-no-advance) - (match-utf8-char-in-alphabet match-parser-buffer-char-in-set) ;; END deprecated bindings *match-string *match-symbol diff --git a/src/runtime/string.scm b/src/runtime/string.scm index 7766f5a2b..83275c6d4 100644 --- a/src/runtime/string.scm +++ b/src/runtime/string.scm @@ -1359,12 +1359,12 @@ USA. (define (string-find-next-char-in-set string char-set) (guarantee-string string 'STRING-FIND-NEXT-CHAR-IN-SET) - (guarantee-char-set char-set 'STRING-FIND-NEXT-CHAR-IN-SET) + (guarantee char-set? char-set 'STRING-FIND-NEXT-CHAR-IN-SET) (%substring-find-next-char-in-set string 0 (string-length string) char-set)) (define (substring-find-next-char-in-set string start end char-set) (guarantee-substring string start end 'SUBSTRING-FIND-NEXT-CHAR-IN-SET) - (guarantee-char-set char-set 'SUBSTRING-FIND-NEXT-CHAR-IN-SET) + (guarantee char-set? char-set 'SUBSTRING-FIND-NEXT-CHAR-IN-SET) (%substring-find-next-char-in-set string start end char-set)) (define-integrable (%substring-find-next-char-in-set string start end char-set) @@ -1373,13 +1373,13 @@ USA. (define (string-find-previous-char-in-set string char-set) (guarantee-string string 'STRING-FIND-PREVIOUS-CHAR-IN-SET) - (guarantee-char-set char-set 'STRING-FIND-PREVIOUS-CHAR-IN-SET) + (guarantee char-set? char-set 'STRING-FIND-PREVIOUS-CHAR-IN-SET) (%substring-find-previous-char-in-set string 0 (string-length string) char-set)) (define (substring-find-previous-char-in-set string start end char-set) (guarantee-substring string start end 'SUBSTRING-FIND-PREVIOUS-CHAR-IN-SET) - (guarantee-char-set char-set 'SUBSTRING-FIND-PREVIOUS-CHAR-IN-SET) + (guarantee char-set? char-set 'SUBSTRING-FIND-PREVIOUS-CHAR-IN-SET) (%substring-find-previous-char-in-set string start end char-set)) (define (%substring-find-previous-char-in-set string start end char-set) @@ -1678,8 +1678,4 @@ USA. string2 start2 end2 procedure) (guarantee-substring string1 start1 end1 procedure) - (guarantee-substring string2 start2 end2 procedure)) - -(define-integrable (guarantee-char-set object procedure) - (if (not (char-set? object)) - (error:wrong-type-argument object "character set" procedure))) \ No newline at end of file + (guarantee-substring string2 start2 end2 procedure)) \ No newline at end of file diff --git a/src/xml/turtle.scm b/src/xml/turtle.scm index 7f7469fae..8f36c7b91 100644 --- a/src/xml/turtle.scm +++ b/src/xml/turtle.scm @@ -279,7 +279,7 @@ USA. char-set:turtle-digit)) (define char-set:name-start-char - (scalar-values->char-set + (char-set* '((#x0041 . #x005B) #x005F (#x0061 . #x007B) @@ -298,7 +298,7 @@ USA. (define char-set:name-char (char-set-union char-set:name-start-char - (scalar-values->char-set + (char-set* '(#x002D (#x0030 . #x003A) #x00B7 @@ -309,7 +309,7 @@ USA. (char-set-difference char-set:name-start-char (char-set #\_))) (define char-set:character - (scalar-values->char-set '((#x20 . #x5C) (#x5D . #x110000)))) + (char-set* '((#x20 . #x5C) (#x5D . #x110000)))) (define char-set:ucharacter (char-set-difference char-set:character (char-set #\>))) diff --git a/src/xml/xml-chars.scm b/src/xml/xml-chars.scm index e82ac03ca..692698e3c 100644 --- a/src/xml/xml-chars.scm +++ b/src/xml/xml-chars.scm @@ -29,7 +29,7 @@ USA. (declare (usual-integrations)) (define char-set:xml-base-char - (scalar-values->char-set + (char-set* '((#x0041 . #x005B) (#x0061 . #x007B) (#x00C0 . #x00D7) @@ -234,13 +234,13 @@ USA. (#xAC00 . #xD7A4)))) (define char-set:xml-ideographic - (scalar-values->char-set + (char-set* '(#x3007 (#x3021 . #x302A) (#x4E00 . #x9FA6)))) (define char-set:xml-combining-char - (scalar-values->char-set + (char-set* '((#x0300 . #x0346) (#x0360 . #x0362) (#x0483 . #x0487) @@ -338,7 +338,7 @@ USA. #x309A))) (define char-set:xml-digit - (scalar-values->char-set + (char-set* '((#x0030 . #x003A) (#x0660 . #x066A) (#x06F0 . #x06FA) @@ -356,7 +356,7 @@ USA. (#x0F20 . #x0F2A)))) (define char-set:xml-extender - (scalar-values->char-set + (char-set* '(#x00B7 #x02D0 #x02D1 @@ -370,7 +370,7 @@ USA. (#x30FC . #x30FF)))) (define char-set:xml-char - (scalar-values->char-set + (char-set* '(#x0009 #x000A #x000D diff --git a/tests/runtime/test-char-set.scm b/tests/runtime/test-char-set.scm index 82543f4ac..ca6352dcd 100644 --- a/tests/runtime/test-char-set.scm +++ b/tests/runtime/test-char-set.scm @@ -39,14 +39,14 @@ USA. interesting-svls))) (define (svl-round-trip svl) - (char-set->scalar-values (scalar-values->char-set svl))) + (char-set->code-points (char-set* svl))) (define-test 'random-svl-round-trip (lambda () (map (lambda (svl) (run-sub-test (lambda () - (guarantee-well-formed-scalar-value-list svl) + (guarantee code-point-list? svl) (assert-equal-canonical-svls (canonicalize-svl svl) (svl-round-trip svl))))) (append! (append-map! (lambda (i) @@ -84,7 +84,7 @@ USA. (run-sub-test (lambda () (assert-boolean= - (char-set-member? (scalar-values->char-set svl) + (char-set-member? (char-set* svl) (integer->char value)) (named-call 'SVL-MEMBER? svl-member? svl value))) 'EXPRESSION `(CHAR-SET-MEMBER? ,svl ,value))) @@ -117,7 +117,7 @@ USA. interesting-svls))) (define (svl-invert-thru svl) - (char-set->scalar-values (char-set-invert (scalar-values->char-set svl)))) + (char-set->code-points (char-set-invert (char-set* svl)))) (define (svl-invert-direct svl) @@ -144,9 +144,9 @@ USA. (run-sub-test (lambda () (assert-equal - (char-set->scalar-values - (operation (scalar-values->char-set svl1) - (scalar-values->char-set svl2))) + (char-set->code-points + (operation (char-set* svl1) + (char-set* svl2))) (svl-direct (trim-empty-segments svl1) (trim-empty-segments svl2)))) 'EXPRESSION `(,name ,svl1 ,svl2))) diff --git a/tests/runtime/test-regsexp.scm b/tests/runtime/test-regsexp.scm index 938703f5c..30b6e9956 100644 --- a/tests/runtime/test-regsexp.scm +++ b/tests/runtime/test-regsexp.scm @@ -377,14 +377,14 @@ USA. ((seq "a" (inverse-char-set "ab") "c") ("abc" #f) "adc") - ((seq "a" (char-set ,(char-set->alphabet char-set:alphabetic)) "c") + ((seq "a" (char-set ,char-set:alphabetic) "c") "abc" "adc") - ((seq "a" (+ (char-set ,(char-set->alphabet char-set:numeric))) "c") + ((seq "a" (+ (char-set ,char-set:numeric)) "c") "a019c") - ((seq "A" (+ (char-set ,(char-set->alphabet char-set:lower-case))) "C") + ((seq "A" (+ (char-set ,char-set:lower-case)) "C") "AabC") - ((seq "a" (+ (char-set ,(char-set->alphabet char-set:upper-case))) "c") + ((seq "a" (+ (char-set ,char-set:upper-case)) "c") "aBCc") ((seq "a" (** 20 (char-set "ab"))) "aaaaabaaaabaaaabaaaab") -- 2.25.1