(else #t)))
#f)))))
+(define (char-set-predicate char-set)
+ (guarantee-char-set char-set 'CHAR-SET-PREDICATE)
+ (lambda (char)
+ (char-set-member? char-set char)))
+
(define (char-set=? char-set . char-sets)
(guarantee-char-set char-set 'CHAR-SET=?)
(guarantee-char-sets char-sets 'CHAR-SET=?)
(ascii-range->char-set #xC0 #xD7)
(ascii-range->char-set #xD8 #xDE)))
(define-deferred char-set:not-upper-case (char-set-invert char-set:upper-case))
-(define-deferred char-upper-case? (%char-set-test char-set:upper-case))
+(define-deferred char-upper-case? (char-set-predicate char-set:upper-case))
(define-deferred char-set:lower-case
(char-set-union (ascii-range->char-set #x61 #x7B)
(ascii-range->char-set #xE0 #xF7)
(ascii-range->char-set #xF8 #xFF)))
(define-deferred char-set:not-lower-case (char-set-invert char-set:lower-case))
-(define-deferred char-lower-case? (%char-set-test char-set:lower-case))
+(define-deferred char-lower-case? (char-set-predicate char-set:lower-case))
(define-deferred char-set:numeric (ascii-range->char-set #x30 #x3A))
(define-deferred char-set:not-numeric (char-set-invert char-set:numeric))
-(define-deferred char-numeric? (%char-set-test char-set:numeric))
+(define-deferred char-numeric? (char-set-predicate char-set:numeric))
(define-deferred char-set:graphic
(char-set-union (ascii-range->char-set #x20 #x7F)
(ascii-range->char-set #xA0 #x100)))
(define-deferred char-set:not-graphic (char-set-invert char-set:graphic))
-(define-deferred char-graphic? (%char-set-test char-set:graphic))
+(define-deferred char-graphic? (char-set-predicate char-set:graphic))
(define-deferred char-set:whitespace
(char-set #\newline #\tab #\linefeed #\page #\return #\space
(integer->char #xA0)))
(define-deferred char-set:not-whitespace (char-set-invert char-set:whitespace))
-(define-deferred char-whitespace? (%char-set-test char-set:whitespace))
+(define-deferred char-whitespace? (char-set-predicate char-set:whitespace))
(define-deferred char-set:alphabetic
(char-set-union char-set:upper-case char-set:lower-case))
(define-deferred char-set:not-alphabetic (char-set-invert char-set:alphabetic))
-(define-deferred char-alphabetic? (%char-set-test char-set:alphabetic))
+(define-deferred char-alphabetic? (char-set-predicate char-set:alphabetic))
(define-deferred char-set:alphanumeric
(char-set-union char-set:alphabetic char-set:numeric))
(define-deferred char-set:not-alphanumeric
(char-set-invert char-set:alphanumeric))
-(define-deferred char-alphanumeric? (%char-set-test char-set:alphanumeric))
+(define-deferred char-alphanumeric? (char-set-predicate char-set:alphanumeric))
(define-deferred char-set:standard
(char-set-union char-set:graphic (char-set #\newline)))
(define-deferred char-set:not-standard (char-set-invert char-set:standard))
-(define-deferred char-standard? (%char-set-test char-set:standard))
+(define-deferred char-standard? (char-set-predicate char-set:standard))
(define-deferred char-set:newline
(char-set #\newline))
(define-deferred char-set:ctls
(char-set-union (ascii-range->char-set #x00 #x20)
(ascii-range->char-set #x7F #x80)))
-(define-deferred char-ctl? (%char-set-test char-set:ctls))
+(define-deferred char-ctl? (char-set-predicate char-set:ctls))
(define-deferred char-set:wsp (char-set #\space #\tab))
-(define-deferred char-wsp? (%char-set-test char-set:wsp))
-
-(define (%char-set-test char-set)
- (lambda (char)
- (char-set-member? char-set char)))
+(define-deferred char-wsp? (char-set-predicate char-set:wsp))
\f
;;;; Backwards compatibility
(define (string->char-set string)
- (guarantee-string string 'STRING->CHAR-SET)
- (let ((n (string-length string))
- (low (%make-low 0)))
- (do ((i 0 (fix:+ i 1)))
- ((fix:= i n))
- (%low-set! low (vector-8b-ref string i)))
- (%make-char-set low '#() '#())))
+ (scalar-values->char-set (map char->integer (string->list string))))
(define (char-set->string char-set)
- (guarantee-8-bit-char-set char-set 'CHAR-SET->STRING)
- (let loop ((i 0) (chars '()))
- (if (fix:< i %low-length)
- (loop (fix:+ i 1)
- (if (%scalar-value-in-char-set? i char-set)
- (cons (integer->char i) chars)
- chars))
- (apply string (reverse! chars)))))
+ (list->string (map integer->char (char-set-members char-set))))
+
+(define (char-set-members char-set)
+ (guarantee-8-bit-char-set char-set 'CHAR-SET-MEMBERS)
+ (let ((low (char-set-low char-set)))
+ (let loop ((code 0))
+ (if (fix:< code #x100)
+ (if (%low-ref low code)
+ (cons (integer->char code)
+ (loop (fix:+ code 1)))
+ (loop (fix:+ code 1)))
+ '()))))
(define (predicate->char-set predicate)
(%scalar-values->char-set
(predicate (integer->char i)))
(iota #x100))))
-(define (char-set-members char-set)
- (guarantee-8-bit-char-set char-set 'CHAR-SET-MEMBERS)
- (let ((low (char-set-low char-set)))
- (let loop ((code #xFF) (chars '()))
- (if (fix:>= code 0)
- (loop (fix:- code 1)
- (if (%low-ref low code)
- (cons (integer->char code) chars)
- chars))
- chars))))
-
(define (char-set . chars)
(for-each (lambda (char)
(guarantee-unicode-char char 'CHAR-SET))
(error:bad-range-argument start 'ASCII-RANGE->CHAR-SET))
(if (not (fix:<= end #x100))
(error:bad-range-argument end 'ASCII-RANGE->CHAR-SET))
- (%scalar-values->char-set (list (cons start (fix:- end 1)))))
\ No newline at end of file
+ (%scalar-values->char-set (list (cons start (fix:- end 1)))))
+
+(define (char-in-alphabet? char alphabet)
+ (char-set-member? alphabet char))
+
+(define (scalar-values->alphabet items)
+ (scalar-values->char-set
+ (map (lambda (range)
+ (if (and (pair? range)
+ (unicode-scalar-value? (car range))
+ (unicode-scalar-value? (cdr range)))
+ (cons (car range) (fix:+ (cdr range) 1))
+ range))
+ items)))
+
+(define (alphabet->scalar-values alphabet)
+ (map (lambda (range)
+ (if (pair? range)
+ (set-cdr! range (fix:- (cdr range) 1))))
+ (char-set->scalar-values alphabet)))
+
+(define (char-set->alphabet char-set)
+ char-set)
\ No newline at end of file
|#
\f
-;;;; Alphabets
-
-(define-structure (alphabet (type-descriptor <alphabet>))
- (low #f read-only #t)
- (high1 #f read-only #t)
- (high2 #f read-only #t))
-
-(define-guarantee alphabet "a Unicode alphabet")
-
-(define-integrable (make-alphabet-low)
- (make-string #x100 (integer->char 0)))
-
-(define-integrable (alphabet-low-ref low scalar-value)
- (not (fix:= (fix:and (vector-8b-ref low (fix:lsh scalar-value -3))
- (fix:lsh 1 (fix:and scalar-value 7)))
- 0)))
-
-(define-integrable (alphabet-low-set! low scalar-value)
- (vector-8b-set! low
- (fix:lsh scalar-value -3)
- (fix:or (vector-8b-ref low (fix:lsh scalar-value -3))
- (fix:lsh 1 (fix:and scalar-value 7)))))
-
-(define null-alphabet
- (make-alphabet (make-alphabet-low) '#() '#()))
-
-(define (char-in-alphabet? char alphabet)
- (guarantee-unicode-char char 'CHAR-IN-ALPHABET?)
- (guarantee-alphabet alphabet 'CHAR-IN-ALPHABET?)
- (%scalar-value-in-alphabet? (char-code char) alphabet))
-
-(define (%scalar-value-in-alphabet? pt alphabet)
- (if (fix:< pt #x800)
- (alphabet-low-ref (alphabet-low alphabet) pt)
- (let ((high1 (alphabet-high1 alphabet))
- (high2 (alphabet-high2 alphabet)))
- (let loop ((lower 0) (upper (vector-length high1)))
- (and (fix:< lower upper)
- (let ((index (fix:quotient (fix:+ lower upper) 2)))
- (cond ((fix:< pt (vector-ref high1 index))
- (loop lower index))
- ((fix:< (vector-ref high2 index) pt)
- (loop (fix:+ index 1) upper))
- (else #t))))))))
-
-(define (well-formed-scalar-value-list? items)
- (list-of-type? items well-formed-scalar-value-range?))
-
-(define (well-formed-scalar-value-range? item)
- (if (pair? item)
- (and (unicode-scalar-value? (car item))
- (unicode-scalar-value? (cdr item))
- (fix:<= (car item) (cdr item)))
- (unicode-scalar-value? item)))
-
-(define-guarantee well-formed-scalar-value-list "a Unicode scalar-value list")
-(define-guarantee well-formed-scalar-value-range "a Unicode scalar-value range")
-\f
-(define (scalar-values->alphabet items)
- (guarantee-well-formed-scalar-value-list items 'SCALAR-VALUES->ALPHABET)
- (%scalar-values->alphabet items))
-
-(define (alphabet . chars)
- (for-each (lambda (char)
- (guarantee-unicode-char char 'ALPHABET))
- chars)
- (%scalar-values->alphabet (map char->integer chars)))
-
-(define (%scalar-values->alphabet items)
- (receive (low-items high-items)
- (split-list (canonicalize-scalar-value-list items) #x800)
- (let ((low (make-alphabet-low)))
- (for-each (lambda (item)
- (if (pair? item)
- (do ((i (car item) (fix:+ i 1)))
- ((fix:> i (cdr item)))
- (alphabet-low-set! low i))
- (alphabet-low-set! low item)))
- low-items)
- (let ((n-high (length high-items)))
- (let ((high1 (make-vector n-high))
- (high2 (make-vector n-high)))
- (do ((items high-items (cdr items))
- (i 0 (fix:+ i 1)))
- ((not (pair? items)))
- (if (pair? (car items))
- (begin
- (vector-set! high1 i (caar items))
- (vector-set! high2 i (cdar items)))
- (begin
- (vector-set! high1 i (car items))
- (vector-set! high2 i (car items)))))
- (make-alphabet low high1 high2))))))
-
-(define (alphabet->scalar-values alphabet)
- (guarantee-alphabet alphabet 'ALPHABET->SCALAR-VALUES)
- (append! (alphabet-low->scalar-values (alphabet-low alphabet))
- (alphabet-high->scalar-values (alphabet-high1 alphabet)
- (alphabet-high2 alphabet))))
-
-(define (alphabet-low->scalar-values low)
- (let find-lower ((i 0) (result '()))
- (if (fix:< i #x800)
- (if (alphabet-low-ref low i)
- (let ((lower i))
- (let find-upper ((i (fix:+ i 1)))
- (if (fix:< i #x800)
- (if (alphabet-low-ref low i)
- (find-upper (fix:+ i 1))
- (find-lower i
- (cons (if (fix:= lower (fix:- i 1))
- lower
- (cons lower (fix:- i 1)))
- result)))
- (reverse!
- (cons (if (fix:= lower (fix:- i 1))
- lower
- (cons lower (fix:- i 1)))
- result)))))
- (find-lower (fix:+ i 1) result))
- (reverse! result))))
-
-(define (alphabet-high->scalar-values lower upper)
- (let ((n (vector-length lower)))
- (let loop ((i 0) (result '()))
- (if (fix:< i n)
- (loop (fix:+ i 1)
- (cons (if (fix:< (vector-ref lower i) (vector-ref upper i))
- (cons (vector-ref lower i) (vector-ref upper i))
- (vector-ref lower i))
- result))
- (reverse! result)))))
-\f
-(define (canonicalize-scalar-value-list items)
- (if (pair? items)
- (let ((items
- (sort items
- (lambda (a b)
- (let ((al (if (pair? a) (car a) a))
- (ah (if (pair? a) (cdr a) a))
- (bl (if (pair? b) (car b) b))
- (bh (if (pair? b) (cdr b) b)))
- (or (fix:< al bl)
- (and (fix:= al bl)
- (fix:< ah bh)))))))
- (make-item
- (lambda (l h)
- (if (fix:= l h)
- l
- (cons l h)))))
- (let loop
- ((al (if (pair? (car items)) (caar items) (car items)))
- (ah (if (pair? (car items)) (cdar items) (car items)))
- (items (cdr items)))
- (if (pair? items)
- (let ((bl (if (pair? (car items)) (caar items) (car items)))
- (bh (if (pair? (car items)) (cdar items) (car items)))
- (items (cdr items)))
- (if (fix:< (fix:+ ah 1) bl)
- (cons (make-item al ah)
- (loop bl bh items))
- (loop al (fix:max ah bh) items)))
- (list (make-item al ah)))))
- items))
-
-(define (split-list items limit)
- (let loop ((items items) (low '()))
- (if (pair? items)
- (let ((item (car items)))
- (cond ((not (pair? item))
- (if (fix:< item limit)
- (loop (cdr items) (cons item low))
- (values low items)))
- ((fix:< (cdr item) limit)
- (loop (cdr items) (cons item low)))
- ((fix:<= limit (car item))
- (values low items))
- (else
- (values (cons (cons (car item) (fix:- limit 1)) low)
- (cons (cons limit (cdr item)) items)))))
- (values low '()))))
-\f
-#|
-
-(define (test-canonicalize-scalar-value-list n-items n-iter)
- (run-cpl-test n-items n-iter canonicalize-scalar-value-list))
-
-(define (test-alphabet->scalar-values n-items n-iter)
- (run-cpl-test n-items n-iter
- (lambda (cpl)
- (alphabet->scalar-values (scalar-values->alphabet cpl)))))
-
-(define (run-cpl-test n-items n-iter procedure)
- (do ((i 0 (+ i 1))
- (failures '()
- (let ((cpl (make-test-cpl n-items)))
- (guarantee-well-formed-scalar-value-list cpl)
- (let ((cpl* (procedure cpl)))
- (if (canonical-scalar-value-list? cpl*)
- failures
- (cons (cons cpl cpl*) failures))))))
- ((not (< i n-iter))
- (let ((n-failures (length failures)))
- (if (> n-failures 0)
- (begin
- (write-string "Got ")
- (write n-failures)
- (write-string " failure")
- (if (> n-failures 1)
- (write-string "s"))
- (write-string " out of ")
- (write n-iter)
- (newline)
- (pp failures)))))))
-
-(define (make-test-cpl n-items)
- (make-initialized-list n-items
- (lambda (i)
- (let loop ()
- (let ((n (random #x10000)))
- (if (unicode-scalar-value? n)
- (let ((m (random #x100)))
- (if (fix:= m 0)
- n
- (if (unicode-scalar-value? (fix:+ n m))
- (fix:+ n m)
- (loop))))
- (loop)))))))
-
-(define (canonical-scalar-value-list? items)
- (and (well-formed-scalar-value-list? items)
- (if (pair? items)
- (let loop ((a (car items)) (items (cdr items)))
- (if (pair? items)
- (let ((b (car items))
- (items (cdr items)))
- (and (fix:< (fix:+ (if (pair? a) (cdr a) a) 1)
- (if (pair? b) (car b) b))
- (loop b items)))
- #t))
- #t)))
-|#
-\f
-(define (8-bit-alphabet? alphabet)
- (and (fix:= (vector-length (alphabet-high1 alphabet)) 0)
- (let ((low (alphabet-low alphabet)))
- (let loop ((i #x20))
- (or (fix:= i #x100)
- (and (fix:= (vector-8b-ref low i) 0)
- (loop (fix:+ i 1))))))))
-
-(define-guarantee 8-bit-alphabet "an 8-bit alphabet")
-
-(define (char-set->alphabet char-set)
- (guarantee-char-set char-set 'CHAR-SET->ALPHABET)
- (let ((low (make-alphabet-low)))
- (do ((i 0 (fix:+ i 1)))
- ((fix:= i #x100))
- (if (char-set-member? char-set (integer->char i))
- (alphabet-low-set! low i)))
- (make-alphabet low '#() '#())))
-
-(define (alphabet->char-set alphabet)
- (guarantee-8-bit-alphabet alphabet 'ALPHABET->CHAR-SET)
- (predicate->char-set (lambda (char) (char-in-alphabet? char alphabet))))
-
-(define (string->alphabet string)
- (guarantee-string string 'STRING->ALPHABET)
- (let ((n (string-length string))
- (low (make-alphabet-low)))
- (do ((i 0 (fix:+ i 1)))
- ((fix:= i n))
- (alphabet-low-set! low (vector-8b-ref string i)))
- (make-alphabet low '#() '#())))
-
-(define (alphabet->string alphabet)
- (guarantee-8-bit-alphabet alphabet 'ALPHABET->STRING)
- (let loop ((i 0) (chars '()))
- (if (fix:< i #x100)
- (loop (fix:+ i 1)
- (if (%scalar-value-in-alphabet? i alphabet)
- (cons (integer->char i) chars)
- chars))
- (apply string (reverse! chars)))))
-\f
-(define (alphabet+ . alphabets)
- (for-each (lambda (alphabet)
- (guarantee-alphabet alphabet 'ALPHABET+))
- alphabets)
- (reduce alphabet+2 null-alphabet alphabets))
-
-(define (alphabet+2 a1 a2)
- (receive (high1 high2)
- (alphabet-high+2 (alphabet-high1 a1)
- (alphabet-high2 a1)
- (alphabet-high1 a2)
- (alphabet-high2 a2))
- (make-alphabet (alphabet-low+2 (alphabet-low a1) (alphabet-low a2))
- high1
- high2)))
-
-(define (alphabet-low+2 low1 low2)
- (let ((low (make-alphabet-low)))
- (do ((i 0 (fix:+ i 1)))
- ((fix:= i #x100))
- (vector-8b-set! low i
- (fix:or (vector-8b-ref low1 i)
- (vector-8b-ref low2 i))))
- low))
-
-(define (alphabet-high+2 lower1 upper1 lower2 upper2)
- (let ((n1 (vector-length lower1))
- (n2 (vector-length lower2)))
- (let ((lower (make-vector (fix:+ n1 n2)))
- (upper (make-vector (fix:+ n1 n2))))
- (let ((n
- (let loop ((i1 0) (i2 0) (i 0))
- (cond ((fix:= i1 n1)
- (subvector-move-left! lower2 i2 n2 lower i)
- (subvector-move-left! upper2 i2 n2 upper i)
- (fix:+ i (fix:- n2 i2)))
- ((fix:= i2 n2)
- (subvector-move-left! lower1 i1 n1 lower i)
- (subvector-move-left! upper1 i1 n1 upper i)
- (fix:+ i (fix:- n1 i1)))
- ((fix:< (vector-ref upper1 i1) (vector-ref lower2 i2))
- (vector-set! lower i (vector-ref lower1 i1))
- (vector-set! upper i (vector-ref upper1 i1))
- (loop (fix:+ i1 1) i2 (fix:+ i 1)))
- ((fix:< (vector-ref upper2 i2) (vector-ref lower1 i1))
- (vector-set! lower i (vector-ref lower2 i2))
- (vector-set! upper i (vector-ref upper2 i2))
- (loop i1 (fix:+ i2 1) (fix:+ i 1)))
- (else
- (vector-set! lower i
- (min (vector-ref lower1 i1)
- (vector-ref lower2 i2)))
- (vector-set! upper i
- (max (vector-ref upper1 i1)
- (vector-ref upper2 i2)))
- (loop (fix:+ i1 1) (fix:+ i2 1) (fix:+ i 1)))))))
- (if (fix:< n (vector-length lower))
- (values (vector-head lower n) (vector-head upper n))
- (values lower upper))))))
-\f
-(define (alphabet- a1 a2)
- (receive (high1 high2)
- (alphabet-high- (alphabet-high1 a1)
- (alphabet-high2 a1)
- (alphabet-high1 a2)
- (alphabet-high2 a2))
- (make-alphabet (alphabet-low- (alphabet-low a1) (alphabet-low a2))
- high1
- high2)))
-
-(define (alphabet-low- low1 low2)
- (let ((low (make-alphabet-low)))
- (do ((i 0 (fix:+ i 1)))
- ((fix:= i #x100))
- (vector-8b-set! low i
- (fix:and (vector-8b-ref low1 i)
- (fix:not (vector-8b-ref low2 i)))))
- low))
-
-(define (alphabet-high- lower1 upper1 lower2 upper2)
- (let ((n1 (vector-length lower1))
- (n2 (vector-length lower2)))
- (let ((lower (make-vector (fix:* n1 2)))
- (upper (make-vector (fix:* n1 2))))
- (let ((n
- (let loop ((i1 0) (i2 0) (i 0))
- (cond ((fix:= i1 n1)
- i)
- ((fix:= i2 n2)
- (subvector-move-left! lower1 i1 n1 lower i)
- (subvector-move-left! upper1 i1 n1 upper i)
- (fix:+ i (fix:- n1 i1)))
- ((fix:< (vector-ref upper1 i1) (vector-ref lower2 i2))
- (vector-set! lower i (vector-ref lower1 i1))
- (vector-set! upper i (vector-ref upper1 i1))
- (loop (fix:+ i1 1) i2 (fix:+ i 1)))
- ((fix:< (vector-ref upper2 i2) (vector-ref lower1 i1))
- (loop i1 (fix:+ i2 1) i))
- ((fix:< (vector-ref lower1 i1) (vector-ref lower2 i2))
- (vector-set! lower i (vector-ref lower1 i1))
- (vector-set! upper i (- (vector-ref lower2 i2) 1))
- (if (fix:<= (vector-ref upper1 i1)
- (vector-ref upper2 i2))
- (loop (fix:+ i1 1) (fix:+ i2 1) (fix:+ i 1))
- (begin
- (vector-set! lower (fix:+ i 1)
- (+ (vector-ref upper2 i2) 1))
- (vector-set! upper (fix:+ i 1)
- (vector-ref upper1 i1))
- (loop (fix:+ i1 1) (fix:+ i2 1) (fix:+ i 2)))))
- ((fix:<= (vector-ref upper1 i1) (vector-ref upper2 i2))
- (loop (fix:+ i1 1) (fix:+ i2 1) i))
- (else
- (vector-set! lower i (+ (vector-ref upper2 i2) 1))
- (vector-set! upper i (vector-ref upper1 i1))
- (loop (fix:+ i1 1) (fix:+ i2 1) (fix:+ i 1)))))))
- (if (fix:< n (vector-length lower))
- (values (vector-head lower n) (vector-head upper n))
- (values lower upper))))))
-\f
;;;; Unicode strings
(define-structure (wide-string (type-descriptor <wide-string>)
(error:bad-range-argument coding caller))
(else
(error:wrong-type-argument string "string" caller)))
- string start end))
-
-(define (alphabet-predicate alphabet)
- (cond ((alphabet? alphabet)
- (lambda (char) (char-in-alphabet? char alphabet)))
- ((char-set? alphabet)
- (lambda (char) (char-set-member? alphabet char)))
- (else
- (error:not-alphabet alphabet 'ALPHABET-PREDICATE))))
\ No newline at end of file
+ string start end))
\ No newline at end of file