From 5a6eb167435c3e6eb72e5f8281bd31dbcea1479b Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Fri, 29 Nov 2019 22:01:06 -0800 Subject: [PATCH] Markup and tweaks for partial SRFI 14 support. --- src/runtime/char-set.scm | 45 +++++++++++++++++++----------- src/runtime/hash-table.scm | 1 + src/runtime/runtime.pkg | 57 +++++++++++++++++++++++--------------- 3 files changed, 64 insertions(+), 39 deletions(-) diff --git a/src/runtime/char-set.scm b/src/runtime/char-set.scm index efba94f88..5952592fa 100644 --- a/src/runtime/char-set.scm +++ b/src/runtime/char-set.scm @@ -369,6 +369,9 @@ USA. (loop ranges))))))) ranges)) +(define (string->char-set string) + (char-set* (map char->integer (string->list string)))) + (define (compute-char-set procedure) (define (find-start cp end ilist) @@ -386,8 +389,7 @@ USA. (scons end start ilist))) (%inversion-list->char-set - (reverse! (find-start #xE000 #x110000 - (find-start 0 #xD800 '()))))) + (reverse! (find-start #xE000 #x110000 (find-start 0 #xD800 '()))))) ;;;; Code-point lists @@ -465,10 +467,13 @@ USA. ;;;; Accessors -(define (char-in-set? char char-set) - (guarantee char? char 'char-in-set?) +(define (char-set-contains? char-set char) + (guarantee char? char 'char-set-contains?) (%code-point-in-char-set? (char-code char) char-set)) +(define (char-in-set? char char-set) + (char-set-contains? char-set char)) + (define (code-point-in-char-set? cp char-set) (guarantee unicode-code-point? cp 'code-point-in-char-set?) (%code-point-in-char-set? cp char-set)) @@ -490,7 +495,7 @@ USA. (define (char-set-table char-set) (force (%char-set-table char-set))) -(define (char-set=? char-set . char-sets) +(define (char-set= char-set . char-sets) (every (lambda (char-set*) (and (bytevector=? (%char-set-low char-set*) (%char-set-low char-set)) @@ -502,9 +507,16 @@ USA. (and (fix:= 0 (bytevector-length (%char-set-low cs))) (fix:= 0 (bytevector-length (%char-set-high cs))))) -(define (char-set-hash char-set) - (primitive-object-hash-2 (%char-set-low char-set) - (%char-set-high char-set))) +(define (char-set-hash char-set #!optional modulus) + (let ((get-hash + (lambda () + (primitive-object-hash-2 (%char-set-low char-set) + (%char-set-high char-set))))) + (if (default-object? modulus) + (get-hash char-set) + (begin + (guarantee positive-fixnum? modulus 'char-set-hash) + (fix:remainder (get-hash char-set) modulus))))) (define (char-set->code-points char-set) (let loop ((ilist (%char-set->inversion-list char-set)) (ranges '())) @@ -521,7 +533,7 @@ USA. ;;;; Combinations -(define (char-set-invert char-set) +(define (char-set-complement char-set) (%inversion-list->char-set (inversion-list-invert (%char-set->inversion-list char-set)))) @@ -591,7 +603,7 @@ USA. (re-char-pattern->code-points pattern) (let ((char-set (char-set* scalar-values))) (if (if negate? (not negate?*) negate?*) - (char-set-invert char-set) + (char-set-complement char-set) char-set)))) (define (re-char-pattern->code-points pattern) @@ -619,13 +631,20 @@ USA. (define char-ctl?) (define char-set:ascii) +(define char-set:blank) (define char-set:ctls) +(define char-set:empty) (define char-set:hex-digit) +(define char-set:iso-control) (define char-set:wsp) (define char-wsp?) (add-boot-init! (lambda () + (set! char-set:blank (char-set #\space #\tab)) + (set! char-set:empty (char-set)) (set! char-set:hex-digit (char-set "0123456789abcdefABCDEF")) + (set! char-set:iso-control + (%inversion-list->char-set '(#x00 #x20 #x7F #x80))) ;; Used in RFCs: @@ -641,12 +660,6 @@ USA. ;;;; Backwards compatibility -(define (char-set-member? char-set char) - (char-in-set? char char-set)) - -(define (string->char-set string) - (char-set* (map char->integer (string->list string)))) - ;; Returns ASCII string: (define (char-set->string char-set) (list->string (char-set-members char-set))) diff --git a/src/runtime/hash-table.scm b/src/runtime/hash-table.scm index 696e7c67c..5289754b5 100644 --- a/src/runtime/hash-table.scm +++ b/src/runtime/hash-table.scm @@ -1375,6 +1375,7 @@ USA. (set-equality-predicate-properties! string=? string-hash #f) (set-equality-predicate-properties! string-ci=? string-ci-hash #f) (set-equality-predicate-properties! int:= int:modulo #f) + (set-equality-predicate-properties! char-set= char-set-hash #f) (register-predicate! equality-predicate? 'equality-predicate))) (define (equality-predicate-keylist equality-predicate) diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index 43d545f74..21aa4cce3 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -1493,8 +1493,10 @@ USA. "ucd-table-wspace") (parent (runtime)) (export () + (char-set:digit char-set:nt=decimal) ;SRFI 14 + (char-set:letter char-set:alphabetic) ;SRFI 14 (char-set:numeric char-set:nt=decimal) - (char-set:title-case char-set:gc=letter:titlecase) + (char-set:title-case char-set:gc=letter:titlecase) ;SRFI 14 (char-numeric? char-nt=decimal?) char-alphabetic? char-cased? @@ -1507,9 +1509,9 @@ USA. char-set:changes-when-case-folded char-set:changes-when-lower-cased char-set:changes-when-upper-cased - char-set:lower-case - char-set:upper-case - char-set:whitespace + char-set:lower-case ;SRFI 14 + char-set:upper-case ;SRFI 14 + char-set:whitespace ;SRFI 14 char-upper-case? char-whitespace?) (export (runtime character) @@ -1569,13 +1571,15 @@ USA. (files "ucd-glue") (parent (runtime)) (export () + (char-set:full char-set:unicode) ;SRFI 14 + (char-set:letter+digit char-set:alphanumeric) ;SRFI 14 char-alphanumeric? char-graphic? char-newline? char-printing? char-set:alphanumeric char-set:control - char-set:graphic + char-set:graphic ;SRFI 14 char-set:newline char-set:no-newline char-set:not-alphabetic @@ -1587,10 +1591,10 @@ USA. char-set:not-standard char-set:not-upper-case char-set:not-whitespace - char-set:printing - char-set:punctuation - char-set:standard - char-set:symbol + char-set:printing ;SRFI 14 + char-set:punctuation ;SRFI 14 + char-set:standard ;SRFI 14 + char-set:symbol ;SRFI 14 char-set:unicode char-standard? unicode-char?) @@ -1609,32 +1613,38 @@ USA. (chars->char-set char-set*) (scalar-values->char-set char-set*) (well-formed-scalar-value-list? code-point-list?) - char-set-member?) + (char-set-member? char-set-contains?) + char-set-members) (export () + (char-set-invert char-set-complement) + (char-set=? char-set=) 8-bit-char-set? ascii-range->char-set char-ctl? char-in-set? - char-set + char-set ;SRFI 14 char-set* char-set->code-points - char-set-difference + char-set-complement ;SRFI 14 + char-set-contains? ;SRFI 14 + char-set-difference ;SRFI 14 char-set-empty? - char-set-hash - char-set-intersection + char-set-hash ;SRFI 14 + char-set-intersection ;SRFI 14 char-set-intersection* - char-set-invert - char-set-members char-set-predicate - char-set-size - char-set-union + char-set-size ;SRFI 14 + char-set-union ;SRFI 14 char-set-union* - char-set:ascii + char-set:ascii ;SRFI 14 + char-set:blank ;SRFI 14 char-set:ctls - char-set:hex-digit + char-set:empty ;SRFI 14 + char-set:hex-digit ;SRFI 14 + char-set:iso-control ;SRFI 14 char-set:wsp - char-set=? - char-set? + char-set= ;SRFI 14 + char-set? ;SRFI 14 char-sets-disjoint? char-wsp? code-point-list? @@ -1642,7 +1652,8 @@ USA. compute-char-set re-char-pattern->code-points re-compile-char-set - string->char-set) + string->char-set ;SRFI 14 + ) (export (runtime regexp regsexp) cpl-element? normalize-ranges)) -- 2.25.1