(loop ranges)))))))
ranges))
+(define (string->char-set string)
+ (char-set* (map char->integer (string->list string))))
+
(define (compute-char-set procedure)
(define (find-start cp end ilist)
(scons end start ilist)))
(%inversion-list->char-set
- (reverse! (find-start #xE000 #x110000
- (find-start 0 #xD800 '())))))
+ (reverse! (find-start #xE000 #x110000 (find-start 0 #xD800 '())))))
\f
;;;; Code-point lists
\f
;;;; Accessors
-(define (char-in-set? char char-set)
- (guarantee char? char 'char-in-set?)
+(define (char-set-contains? char-set char)
+ (guarantee char? char 'char-set-contains?)
(%code-point-in-char-set? (char-code char) char-set))
+(define (char-in-set? char char-set)
+ (char-set-contains? char-set char))
+
(define (code-point-in-char-set? cp char-set)
(guarantee unicode-code-point? cp 'code-point-in-char-set?)
(%code-point-in-char-set? cp char-set))
(define (char-set-table char-set)
(force (%char-set-table char-set)))
-(define (char-set=? char-set . char-sets)
+(define (char-set= char-set . char-sets)
(every (lambda (char-set*)
(and (bytevector=? (%char-set-low char-set*)
(%char-set-low char-set))
(and (fix:= 0 (bytevector-length (%char-set-low cs)))
(fix:= 0 (bytevector-length (%char-set-high cs)))))
-(define (char-set-hash char-set)
- (primitive-object-hash-2 (%char-set-low char-set)
- (%char-set-high char-set)))
+(define (char-set-hash char-set #!optional modulus)
+ (let ((get-hash
+ (lambda ()
+ (primitive-object-hash-2 (%char-set-low char-set)
+ (%char-set-high char-set)))))
+ (if (default-object? modulus)
+ (get-hash char-set)
+ (begin
+ (guarantee positive-fixnum? modulus 'char-set-hash)
+ (fix:remainder (get-hash char-set) modulus)))))
(define (char-set->code-points char-set)
(let loop ((ilist (%char-set->inversion-list char-set)) (ranges '()))
\f
;;;; Combinations
-(define (char-set-invert char-set)
+(define (char-set-complement char-set)
(%inversion-list->char-set
(inversion-list-invert (%char-set->inversion-list char-set))))
(re-char-pattern->code-points pattern)
(let ((char-set (char-set* scalar-values)))
(if (if negate? (not negate?*) negate?*)
- (char-set-invert char-set)
+ (char-set-complement char-set)
char-set))))
(define (re-char-pattern->code-points pattern)
(define char-ctl?)
(define char-set:ascii)
+(define char-set:blank)
(define char-set:ctls)
+(define char-set:empty)
(define char-set:hex-digit)
+(define char-set:iso-control)
(define char-set:wsp)
(define char-wsp?)
(add-boot-init!
(lambda ()
+ (set! char-set:blank (char-set #\space #\tab))
+ (set! char-set:empty (char-set))
(set! char-set:hex-digit (char-set "0123456789abcdefABCDEF"))
+ (set! char-set:iso-control
+ (%inversion-list->char-set '(#x00 #x20 #x7F #x80)))
;; Used in RFCs:
\f
;;;; Backwards compatibility
-(define (char-set-member? char-set char)
- (char-in-set? char char-set))
-
-(define (string->char-set string)
- (char-set* (map char->integer (string->list string))))
-
;; Returns ASCII string:
(define (char-set->string char-set)
(list->string (char-set-members char-set)))
"ucd-table-wspace")
(parent (runtime))
(export ()
+ (char-set:digit char-set:nt=decimal) ;SRFI 14
+ (char-set:letter char-set:alphabetic) ;SRFI 14
(char-set:numeric char-set:nt=decimal)
- (char-set:title-case char-set:gc=letter:titlecase)
+ (char-set:title-case char-set:gc=letter:titlecase) ;SRFI 14
(char-numeric? char-nt=decimal?)
char-alphabetic?
char-cased?
char-set:changes-when-case-folded
char-set:changes-when-lower-cased
char-set:changes-when-upper-cased
- char-set:lower-case
- char-set:upper-case
- char-set:whitespace
+ char-set:lower-case ;SRFI 14
+ char-set:upper-case ;SRFI 14
+ char-set:whitespace ;SRFI 14
char-upper-case?
char-whitespace?)
(export (runtime character)
(files "ucd-glue")
(parent (runtime))
(export ()
+ (char-set:full char-set:unicode) ;SRFI 14
+ (char-set:letter+digit char-set:alphanumeric) ;SRFI 14
char-alphanumeric?
char-graphic?
char-newline?
char-printing?
char-set:alphanumeric
char-set:control
- char-set:graphic
+ char-set:graphic ;SRFI 14
char-set:newline
char-set:no-newline
char-set:not-alphabetic
char-set:not-standard
char-set:not-upper-case
char-set:not-whitespace
- char-set:printing
- char-set:punctuation
- char-set:standard
- char-set:symbol
+ char-set:printing ;SRFI 14
+ char-set:punctuation ;SRFI 14
+ char-set:standard ;SRFI 14
+ char-set:symbol ;SRFI 14
char-set:unicode
char-standard?
unicode-char?)
(chars->char-set char-set*)
(scalar-values->char-set char-set*)
(well-formed-scalar-value-list? code-point-list?)
- char-set-member?)
+ (char-set-member? char-set-contains?)
+ char-set-members)
(export ()
+ (char-set-invert char-set-complement)
+ (char-set=? char-set=)
8-bit-char-set?
ascii-range->char-set
char-ctl?
char-in-set?
- char-set
+ char-set ;SRFI 14
char-set*
char-set->code-points
- char-set-difference
+ char-set-complement ;SRFI 14
+ char-set-contains? ;SRFI 14
+ char-set-difference ;SRFI 14
char-set-empty?
- char-set-hash
- char-set-intersection
+ char-set-hash ;SRFI 14
+ char-set-intersection ;SRFI 14
char-set-intersection*
- char-set-invert
- char-set-members
char-set-predicate
- char-set-size
- char-set-union
+ char-set-size ;SRFI 14
+ char-set-union ;SRFI 14
char-set-union*
- char-set:ascii
+ char-set:ascii ;SRFI 14
+ char-set:blank ;SRFI 14
char-set:ctls
- char-set:hex-digit
+ char-set:empty ;SRFI 14
+ char-set:hex-digit ;SRFI 14
+ char-set:iso-control ;SRFI 14
char-set:wsp
- char-set=?
- char-set?
+ char-set= ;SRFI 14
+ char-set? ;SRFI 14
char-sets-disjoint?
char-wsp?
code-point-list?
compute-char-set
re-char-pattern->code-points
re-compile-char-set
- string->char-set)
+ string->char-set ;SRFI 14
+ )
(export (runtime regexp regsexp)
cpl-element?
normalize-ranges))