(%high-set! high i (car ilist)))
high)))
\f
-(define (char-set->ilist char-set)
- (reverse!
- (%high->ilist (%char-set-high char-set)
- (%low->ilist (%char-set-low char-set)))))
-
-(define (%low->ilist low)
- (let ((low-limit (%low-limit low)))
-
- (define (find-start i result)
- (if (fix:< i low-limit)
- (if (%low-ref low i)
- (find-end i result)
- (find-start (fix:+ i 1) result))
- result))
-
- (define (find-end start result)
- (let loop ((i (fix:+ start 1)))
- (if (fix:< i low-limit)
- (if (%low-ref low i)
- (loop (fix:+ i 1))
- (find-start i (reverse-ilist-cons start i result)))
- (reverse-ilist-cons start low-limit result))))
-
- (find-start 0 '())))
-
-(define (%high->ilist high result)
- (let ((n (%high-limit high)))
+(define (ilist-invert ilist)
- (define (loop i result)
- (if (fix:< i n)
- (loop (fix:+ i 1)
- (cons (%high-ref high i) result))
- result))
+ (define (loop start ilist inverse)
+ (if (pair? ilist)
+ (loop (cadr ilist)
+ (cddr ilist)
+ (reverse-ilist-cons start (car ilist) inverse))
+ (reverse!
+ (if (fix:< start #x110000)
+ (reverse-ilist-cons start #x110000 inverse)
+ inverse))))
- (if (and (fix:> n 0)
- (pair? result)
- (fix:= (%high-ref high 0) (car result)))
- (loop 1 (cdr result))
- (loop 0 result))))
+ (if (or (not (pair? ilist))
+ (fix:< 0 (car ilist)))
+ (loop 0 ilist '())
+ (loop (cadr ilist) (cddr ilist) '())))
(define-integrable (ilist-cons start end ilist)
(cons start (cons end ilist)))
(define-integrable (reverse-ilist-cons start end ilist)
(cons end (cons start ilist)))
-
-(define (char-set-size char-set)
- (fix:+ (%low-size (%char-set-low char-set))
- (%high-size (%char-set-high char-set))))
-
-(define (%low-size low)
- (let ((low-limit (%low-limit low)))
-
- (define (find-start i size)
- (if (fix:< i low-limit)
- (if (%low-ref low i)
- (let ((end (find-end (fix:+ i 1))))
- (find-start end (fix:+ size (fix:- end i))))
- (find-start (fix:+ i 1) size))
- size))
-
- (define (find-end i)
- (if (fix:< i low-limit)
- (if (%low-ref low i)
- (find-end (fix:+ i 1))
- i)
- low-limit))
-
- (find-start 0 0)))
-
-(define (%high-size high)
- (let ((end (%high-limit high)))
- (do ((index 0 (fix:+ index 2))
- (size 0
- (fix:+ size
- (fix:- (%high-ref high (fix:+ index 1))
- (%high-ref high index)))))
- ((not (fix:< index end)) size))))
\f
(define (ilist-combiner combine)
(lambda (il1 il2)
(loop 0 0 il1 il2 '())))
+
+(define ilist-union
+ (ilist-combiner (lambda (a b) (or a b))))
+
+(define ilist-intersection
+ (ilist-combiner (lambda (a b) (and a b))))
+
+(define ilist-difference
+ (ilist-combiner (lambda (a b) (and a (not b)))))
\f
-;;;; Constructors
+;;;; Ranges
-(define (char-set . chars)
- (char-set* chars))
+(define (range? object)
+ (or (and (pair? object)
+ (index-fixnum? (car object))
+ (index-fixnum? (cdr object))
+ (fix:<= (cdr object) #x110000)
+ (fix:<= (car object) (cdr object)))
+ (unicode-code-point? object)))
-(define (char-set* cpl)
- (guarantee-list-of cpl-element? cpl 'char-set*)
- (char-set-union* (%cpl->char-sets cpl)))
+(define (make-range start end)
+ (if (fix:= (fix:- end start) 1)
+ start
+ (cons start end)))
-(define (%cpl->char-sets cpl)
- (let loop ((cpl cpl) (ranges '()) (char-sets '()))
- (cond ((not (pair? cpl))
- (cons (ranges->char-set (normalize-ranges ranges))
- char-sets))
- ((%cpl-element->ranges (car cpl))
- => (lambda (ranges*)
- (loop (cdr cpl)
- (append ranges* ranges)
- char-sets)))
- ((char-set? (car cpl))
- (loop (cdr cpl)
- ranges
- (cons (car cpl) char-sets)))
- ((name->char-set (car cpl))
- => (lambda (char-set)
- (loop (cdr cpl)
- ranges
- (cons char-set char-sets))))
- (else
- (error:not-a cpl-element? (car cpl))))))
+(define (range-start range)
+ (if (pair? range)
+ (car range)
+ range))
-(define (%cpl-element->ranges elt)
- (cond ((range? elt) (list elt))
- ((char? elt) (list (char-code elt)))
- ((string? elt) (map char->integer (string->list elt)))
- (else #f)))
+(define (range-end range)
+ (if (pair? range)
+ (cdr range)
+ (fix:+ range 1)))
+
+(define (range<? range1 range2)
+ (or (fix:< (range-start range1)
+ (range-start range2))
+ (and (fix:= (range-start range1)
+ (range-start range2))
+ (fix:< (range-end range1)
+ (range-end range2)))))
+
+(define (ranges->char-set ranges)
+ (let loop ((ranges ranges) (ilist '()))
+ (if (pair? ranges)
+ (loop (cdr ranges)
+ (reverse-ilist-cons (range-start (car ranges))
+ (range-end (car ranges))
+ ilist))
+ (ilist->char-set (reverse! ilist)))))
(define (normalize-ranges ranges)
(let ((ranges
(set-cdr! ranges (cddr ranges))
(loop ranges)))))))
ranges))
-
-(define (string->char-set string)
- (char-set* (map char->integer (string->list string))))
-
-(define (compute-char-set procedure)
-
- (define (find-start cp end ilist)
- (if (fix:< cp end)
- (if (procedure cp)
- (find-end (fix:+ cp 1) end cp ilist)
- (find-start (fix:+ cp 1) end ilist))
- ilist))
-
- (define (find-end cp end start ilist)
- (if (fix:< cp end)
- (if (procedure cp)
- (find-end (fix:+ cp 1) end start ilist)
- (find-start (fix:+ cp 1) end (ilist-cons cp start ilist)))
- (ilist-cons end start ilist)))
-
- (ilist->char-set
- (reverse! (find-start #xE000 #x110000 (find-start 0 #xD800 '())))))
\f
;;;; Code-point lists
((whitespace white space) char-set:whitespace)
(else #f)))
-(define (range? object)
- (or (and (pair? object)
- (index-fixnum? (car object))
- (index-fixnum? (cdr object))
- (fix:<= (cdr object) #x110000)
- (fix:<= (car object) (cdr object)))
- (unicode-code-point? object)))
+(define (%cpl->char-sets cpl)
+ (let loop ((cpl cpl) (ranges '()) (char-sets '()))
+ (cond ((not (pair? cpl))
+ (cons (ranges->char-set (normalize-ranges ranges))
+ char-sets))
+ ((%cpl-element->ranges (car cpl))
+ => (lambda (ranges*)
+ (loop (cdr cpl)
+ (append ranges* ranges)
+ char-sets)))
+ ((char-set? (car cpl))
+ (loop (cdr cpl)
+ ranges
+ (cons (car cpl) char-sets)))
+ ((name->char-set (car cpl))
+ => (lambda (char-set)
+ (loop (cdr cpl)
+ ranges
+ (cons char-set char-sets))))
+ (else
+ (error:not-a cpl-element? (car cpl))))))
-(define (make-range start end)
- (if (fix:= (fix:- end start) 1)
- start
- (cons start end)))
+(define (%cpl-element->ranges elt)
+ (cond ((range? elt) (list elt))
+ ((char? elt) (list (char-code elt)))
+ ((string? elt) (map char->integer (string->list elt)))
+ (else #f)))
+\f
+;;;; Predicates
-(define (range-start range)
- (if (pair? range)
- (car range)
- range))
+(define (char-set= char-set . char-sets)
+ (every (lambda (char-set*)
+ (and (bytevector=? (%char-set-low char-set*)
+ (%char-set-low char-set))
+ (bytevector=? (%char-set-high char-set*)
+ (%char-set-high char-set))))
+ char-sets))
-(define (range-end range)
- (if (pair? range)
- (cdr range)
- (fix:+ range 1)))
+(define (char-set-hash char-set #!optional modulus)
+ (let ((hash
+ (primitive-object-hash-2 (%char-set-low char-set)
+ (%char-set-high char-set))))
+ (if (default-object? modulus)
+ hash
+ (begin
+ (guarantee positive-fixnum? modulus 'char-set-hash)
+ (fix:remainder hash modulus)))))
-(define (range<? range1 range2)
- (or (fix:< (range-start range1)
- (range-start range2))
- (and (fix:= (range-start range1)
- (range-start range2))
- (fix:< (range-end range1)
- (range-end range2)))))
+(define (char-set-empty? cs)
+ (and (fix:= 0 (bytevector-length (%char-set-low cs)))
+ (fix:= 0 (bytevector-length (%char-set-high cs)))))
-(define (ranges->char-set ranges)
- (let loop ((ranges ranges) (ilist '()))
- (if (pair? ranges)
- (loop (cdr ranges)
- (reverse-ilist-cons (range-start (car ranges))
- (range-end (car ranges))
- ilist))
- (ilist->char-set (reverse! ilist)))))
+(define (char-sets-disjoint? char-set . char-sets)
+ (every (lambda (char-set*)
+ (char-set-empty? (char-set-intersection char-set char-set*)))
+ char-sets))
\f
-;;;; Accessors
+;;;; Constructors
+
+(define (char-set . cpl)
+ (char-set* cpl))
+
+(define (char-set* cpl)
+ (guarantee-list-of cpl-element? cpl 'char-set*)
+ (char-set-union* (%cpl->char-sets cpl)))
+
+(define (string->char-set string)
+ (char-set* (map char->integer (string->list string))))
+
+(define (compute-char-set procedure)
+
+ (define (find-start cp end ilist)
+ (if (fix:< cp end)
+ (if (procedure cp)
+ (find-end (fix:+ cp 1) end cp ilist)
+ (find-start (fix:+ cp 1) end ilist))
+ ilist))
+
+ (define (find-end cp end start ilist)
+ (if (fix:< cp end)
+ (if (procedure cp)
+ (find-end (fix:+ cp 1) end start ilist)
+ (find-start (fix:+ cp 1) end (ilist-cons cp start ilist)))
+ (ilist-cons end start ilist)))
+
+ (ilist->char-set
+ (reverse! (find-start #xE000 #x110000 (find-start 0 #xD800 '())))))
+\f
+;;;; Queries
+
+(define (char-set->ilist char-set)
+ (reverse!
+ (%high->ilist (%char-set-high char-set)
+ (%low->ilist (%char-set-low char-set)))))
+
+(define (%low->ilist low)
+ (let ((low-limit (%low-limit low)))
+
+ (define (find-start i result)
+ (if (fix:< i low-limit)
+ (if (%low-ref low i)
+ (find-end i result)
+ (find-start (fix:+ i 1) result))
+ result))
+
+ (define (find-end start result)
+ (let loop ((i (fix:+ start 1)))
+ (if (fix:< i low-limit)
+ (if (%low-ref low i)
+ (loop (fix:+ i 1))
+ (find-start i (reverse-ilist-cons start i result)))
+ (reverse-ilist-cons start low-limit result))))
+
+ (find-start 0 '())))
+
+(define (%high->ilist high result)
+ (let ((n (%high-limit high)))
+
+ (define (loop i result)
+ (if (fix:< i n)
+ (loop (fix:+ i 1)
+ (cons (%high-ref high i) result))
+ result))
+
+ (if (and (fix:> n 0)
+ (pair? result)
+ (fix:= (%high-ref high 0) (car result)))
+ (loop 1 (cdr result))
+ (loop 0 result))))
+
+(define (char-set->code-points char-set)
+ (let loop ((ilist (char-set->ilist char-set)) (ranges '()))
+ (if (pair? ilist)
+ (loop (cddr ilist)
+ (cons (make-range (car ilist) (cadr ilist))
+ ranges))
+ (reverse! ranges))))
+\f
+(define (char-set-size char-set)
+ (fix:+ (%low-size (%char-set-low char-set))
+ (%high-size (%char-set-high char-set))))
+
+(define (%low-size low)
+ (let ((low-limit (%low-limit low)))
+
+ (define (find-start i size)
+ (if (fix:< i low-limit)
+ (if (%low-ref low i)
+ (let ((end (find-end (fix:+ i 1))))
+ (find-start end (fix:+ size (fix:- end i))))
+ (find-start (fix:+ i 1) size))
+ size))
+
+ (define (find-end i)
+ (if (fix:< i low-limit)
+ (if (%low-ref low i)
+ (find-end (fix:+ i 1))
+ i)
+ low-limit))
+
+ (find-start 0 0)))
+
+(define (%high-size high)
+ (let ((end (%high-limit high)))
+ (do ((index 0 (fix:+ index 2))
+ (size 0
+ (fix:+ size
+ (fix:- (%high-ref high (fix:+ index 1))
+ (%high-ref high index)))))
+ ((not (fix:< index end)) size))))
(define (char-set-contains? char-set char)
(guarantee char? char 'char-set-contains?)
(loop (fix:+ i 2) upper))
(else #t)))
#f)))))
-
-(define (char-set-table char-set)
- (force (%char-set-table char-set)))
-
-(define (char-set= char-set . char-sets)
- (every (lambda (char-set*)
- (and (bytevector=? (%char-set-low char-set*)
- (%char-set-low char-set))
- (bytevector=? (%char-set-high char-set*)
- (%char-set-high char-set))))
- char-sets))
-
-(define (char-set-empty? cs)
- (and (fix:= 0 (bytevector-length (%char-set-low cs)))
- (fix:= 0 (bytevector-length (%char-set-high cs)))))
-
-(define (char-set-hash char-set #!optional modulus)
- (let ((hash
- (primitive-object-hash-2 (%char-set-low char-set)
- (%char-set-high char-set))))
- (if (default-object? modulus)
- hash
- (begin
- (guarantee positive-fixnum? modulus 'char-set-hash)
- (fix:remainder hash modulus)))))
-
-(define (char-set->code-points char-set)
- (let loop ((ilist (char-set->ilist char-set)) (ranges '()))
- (if (pair? ilist)
- (loop (cddr ilist)
- (cons (make-range (car ilist) (cadr ilist))
- ranges))
- (reverse! ranges))))
-
-(define (char-sets-disjoint? char-set . char-sets)
- (every (lambda (char-set*)
- (char-set-empty? (char-set-intersection char-set char-set*)))
- char-sets))
\f
-;;;; Combinations
+;;;; Algebra
(define (char-set-invert char-set)
(ilist->char-set (ilist-invert (char-set->ilist char-set))))
-(define (ilist-invert ilist)
-
- (define (loop start ilist inverse)
- (if (pair? ilist)
- (loop (cadr ilist)
- (cddr ilist)
- (reverse-ilist-cons start (car ilist) inverse))
- (reverse!
- (if (fix:< start #x110000)
- (reverse-ilist-cons start #x110000 inverse)
- inverse))))
-
- (if (or (not (pair? ilist))
- (fix:< 0 (car ilist)))
- (loop 0 ilist '())
- (loop (cadr ilist) (cddr ilist) '())))
-
(define (char-set-union . char-sets)
(char-set-union* char-sets))
(fold-left ilist-difference
(char-set->ilist char-set)
(map char-set->ilist char-sets))))
-
-(define ilist-union
- (ilist-combiner (lambda (a b) (or a b))))
-
-(define ilist-intersection
- (ilist-combiner (lambda (a b) (and a b))))
-
-(define ilist-difference
- (ilist-combiner (lambda (a b) (and a (not b)))))
\f
;;;; Char-Set Compiler
(values (loop (cdr pattern) '()) #t)
(values (loop pattern '()) #f))))
+(define (char-set-table char-set)
+ (force (%char-set-table char-set)))
+
;;;; Miscellaneous character sets
(define char-ctl?)