From 107937ee5a04e70f736ece3a811ed080f71f2241 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Mon, 31 May 2010 22:31:29 -0700 Subject: [PATCH] Put back alphabet compatibility, in a different form. --- doc/ref-manual/characters.texi | 2 +- src/runtime/chrset.scm | 58 +++++++++++++++++++++++++--------- src/runtime/runtime.pkg | 29 +++++++++++++++-- 3 files changed, 70 insertions(+), 19 deletions(-) diff --git a/doc/ref-manual/characters.texi b/doc/ref-manual/characters.texi index 403e57aab..1cc9d7c4a 100644 --- a/doc/ref-manual/characters.texi +++ b/doc/ref-manual/characters.texi @@ -641,7 +641,7 @@ proper list, each element of which is either a Unicode scalar value or a pair of Unicode scalar values. A pair of Unicode scalar values represents a contiguous range of Unicode scalar values. The @sc{car} of the pair is the inclusive lower limit, and the @sc{cdr} is the -exclusive upper limit. The lower limit must be strictly less than to +exclusive upper limit. The lower limit must be less than or equal to the upper limit. @end deffn diff --git a/src/runtime/chrset.scm b/src/runtime/chrset.scm index 4fb4390a7..4df2a3c69 100644 --- a/src/runtime/chrset.scm +++ b/src/runtime/chrset.scm @@ -85,6 +85,17 @@ USA. (define %null-char-set (%make-char-set (%make-low 0) '#())) + +(define (8-bit-char-set? char-set) + (and (char-set? char-set) + (fix:= (vector-length (%char-set-high char-set)) 0) + (let ((low (%char-set-low char-set))) + (let loop ((i #x20)) + (or (fix:= i %low-length) + (and (fix:= (vector-8b-ref low i) 0) + (loop (fix:+ i 1)))))))) + +(define-guarantee 8-bit-char-set "an 8-bit char-set") ;;;; Conversion to and from scalar-values list @@ -95,7 +106,7 @@ USA. (if (pair? range) (and (index-fixnum? (car range)) (index-fixnum? (cdr range)) - (fix:< (car range) (cdr range)) + (fix:<= (car range) (cdr range)) (fix:<= (cdr range) char-code-limit)) (and (index-fixnum? range) (fix:< range char-code-limit)))) @@ -303,19 +314,6 @@ USA. (loop (fix:+ i 1))) #t))))) -;;;; 8-bit character sets - -(define (8-bit-char-set? char-set) - (and (char-set? char-set) - (fix:= (vector-length (%char-set-high char-set)) 0) - (let ((low (%char-set-low char-set))) - (let loop ((i #x20)) - (or (fix:= i %low-length) - (and (fix:= (vector-8b-ref low i) 0) - (loop (fix:+ i 1)))))))) - -(define-guarantee 8-bit-char-set "an 8-bit char-set") - ;;;; Mapping operations (define (char-set-invert char-set) @@ -578,4 +576,34 @@ USA. (error:bad-range-argument start 'ASCII-RANGE->CHAR-SET)) (if (not (fix:<= end #x100)) (error:bad-range-argument end 'ASCII-RANGE->CHAR-SET)) - (%scalar-values->char-set (list (cons start (fix:- end 1))))) \ No newline at end of file + (%scalar-values->char-set (list (cons start (fix:- end 1))))) + +(define (alphabet->char-set char-set) + char-set) + +(define (char-set->alphabet char-set) + char-set) + +(define (char-in-alphabet? char char-set) + (char-set-member? char-set char)) + +(define (alphabet->scalar-values char-set) + (map (lambda (range) + (if (pair? range) + (cons (car range) + (fix:- (cdr range) 1)) + range)) + (char-set->scalar-values char-set))) + +(define (scalar-values->alphabet ranges) + (guarantee-well-formed-scalar-value-list ranges 'SCALAR-VALUES->ALPHABET) + (%scalar-values->char-set + (map (lambda (range) + (if (pair? range) + (cons (car range) + (if (fix:< (cdr range) char-code-limit) + (fix:+ (cdr range) 1) + (error:bad-range-argument (cdr range) + 'SCALAR-VALUES->ALPHABET))) + range)) + ranges))) \ No newline at end of file diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index c7c8918ef..df0545346 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -1084,16 +1084,33 @@ USA. (files "chrset") (parent (runtime)) (export () + (8-bit-alphabet? 8-bit-char-set?) + ( ) + (alphabet char-set) + (alphabet+ char-set-union) + (alphabet- char-set-difference) + (alphabet->string char-set->string) + (alphabet-predicate char-set-predicate) + (alphabet? char-set?) + (error:not-8-bit-alphabet error:not-8-bit-char-set) + (error:not-alphabet error:not-char-set) + (guarantee-8-bit-alphabet guarantee-8-bit-char-set) + (guarantee-alphabet guarantee-char-set) + (string->alphabet string->char-set) 8-bit-char-set? + alphabet->char-set + alphabet->scalar-values ascii-range->char-set char-alphabetic? char-alphanumeric? char-ctl? char-graphic? + char-in-alphabet? char-lower-case? char-numeric? char-set + char-set->alphabet char-set->scalar-values char-set-difference char-set-intersection @@ -1138,6 +1155,7 @@ USA. guarantee-well-formed-scalar-value-list guarantee-well-formed-scalar-value-range scalar-value-in-char-set? + scalar-values->alphabet scalar-values->char-set string->char-set well-formed-scalar-value-list? @@ -5170,6 +5188,14 @@ USA. (files "parser-buffer") (parent (runtime)) (export () + (match-parser-buffer-char-in-alphabet match-parser-buffer-char-in-set) + (match-parser-buffer-char-in-alphabet-no-advance + match-parser-buffer-char-in-set-no-advance) + (match-parser-buffer-char-not-in-alphabet + match-parser-buffer-char-not-in-set) + (match-parser-buffer-char-not-in-alphabet-no-advance + match-parser-buffer-char-not-in-set-no-advance) + (match-utf8-char-in-alphabet match-parser-buffer-char-in-set) *match-string *match-symbol *match-utf8-string @@ -5186,12 +5212,9 @@ USA. match-parser-buffer-char match-parser-buffer-char-ci match-parser-buffer-char-ci-no-advance - - match-parser-buffer-char-in-set match-parser-buffer-char-in-set-no-advance match-parser-buffer-char-no-advance - match-parser-buffer-char-not-in-set match-parser-buffer-char-not-in-set-no-advance match-parser-buffer-not-char -- 2.25.1