From ac06b090340227f8df3c2d7d99f49698fe579555 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Mon, 25 Nov 2019 13:39:17 -0800 Subject: [PATCH] Add character sets needed for SRFI 115. --- src/runtime/char-set.scm | 39 +++++++++++++++++------------------- src/runtime/runtime.pkg | 23 +++++++++++++++------ src/runtime/ucd-glue.scm | 43 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 27 deletions(-) diff --git a/src/runtime/char-set.scm b/src/runtime/char-set.scm index 6ddc12f84..a91e03170 100644 --- a/src/runtime/char-set.scm +++ b/src/runtime/char-set.scm @@ -370,14 +370,23 @@ USA. (define (name->char-set name) (case name - ((alphabetic) char-set:alphabetic) - ((alphanumeric) char-set:alphanumeric) + ((alphabetic alpha) char-set:alphabetic) + ((alphanumeric alphanum alnum) char-set:alphanumeric) + ((ascii) char-set:ascii) ((cased) char-set:cased) - ((lower-case) char-set:lower-case) - ((numeric) char-set:numeric) - ((unicode) char-set:unicode) - ((upper-case) char-set:upper-case) - ((whitespace) char-set:whitespace) + ((control cntrl) char-set:control) + ((graphic graph) char-set:graphic) + ((hex-digit xdigit) char-set:hex-digit) + ((lower-case lower) char-set:lower-case) + ((no-newline nonl) char-set:no-newline) + ((numeric num) char-set:numeric) + ((printing print) char-set:printing) + ((punctuation punct) char-set:punctuation) + ((symbol) char-set:symbol) + ((title-case title) char-set:title-case) + ((unicode any) char-set:unicode) + ((upper-case upper) char-set:upper-case) + ((whitespace white space) char-set:whitespace) (else #f))) (define (%range? object) @@ -575,29 +584,17 @@ USA. ;;;; Miscellaneous character sets (define char-ctl?) -(define char-graphic?) (define char-set:ascii) (define char-set:ctls) -(define char-set:graphic) +(define char-set:hex-digit) (define char-set:newline) -(define char-set:not-graphic) -(define char-set:not-standard) -(define char-set:standard) (define char-set:wsp) -(define char-standard?) (define char-wsp?) (add-boot-init! (lambda () - (set! char-set:graphic (%inversion-list->char-set '(#x20 #x7F #xA0 #x100))) - (set! char-set:not-graphic (char-set-invert char-set:graphic)) - (set! char-graphic? (char-set-predicate char-set:graphic)) - - (set! char-set:standard - (char-set-union char-set:graphic (char-set #\newline))) - (set! char-set:not-standard (char-set-invert char-set:standard)) - (set! char-standard? (char-set-predicate char-set:standard)) (set! char-set:newline (char-set #\newline)) + (set! char-set:hex-digit (char-set "0123456789abcdefABCDEF")) ;; Used in RFCs: diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index ca56a8b52..72abc6f27 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -1494,6 +1494,7 @@ USA. (parent (runtime)) (export () (char-set:numeric char-set:nt=decimal) + (char-set:title-case char-set:gc=letter:titlecase) (char-numeric? char-nt=decimal?) char-alphabetic? char-cased? @@ -1530,6 +1531,8 @@ USA. char-set:gc=number:decimal-digit char-set:gc=number:letter char-set:gc=number:other + char-set:gc=other:control + char-set:gc=other:format char-set:gc=other:not-assigned char-set:gc=other:private-use char-set:gc=other:surrogate @@ -1567,14 +1570,27 @@ USA. (parent (runtime)) (export () char-alphanumeric? + char-graphic? + char-printing? char-set:alphanumeric + char-set:control + char-set:graphic + char-set:no-newline char-set:not-alphabetic char-set:not-alphanumeric + char-set:not-graphic char-set:not-lower-case char-set:not-numeric + char-set:not-printing + char-set:not-standard char-set:not-upper-case char-set:not-whitespace + char-set:printing + char-set:punctuation + char-set:standard + char-set:symbol char-set:unicode + char-standard? unicode-char?) (export (runtime) char-set:folded-symbol-constituent @@ -1596,7 +1612,6 @@ USA. 8-bit-char-set? ascii-range->char-set char-ctl? - char-graphic? char-in-set? char-set char-set* @@ -1613,16 +1628,12 @@ USA. char-set-union* char-set:ascii char-set:ctls - char-set:graphic + char-set:hex-digit char-set:newline - char-set:not-graphic - char-set:not-standard - char-set:standard char-set:wsp char-set=? char-set? char-sets-disjoint? - char-standard? char-wsp? code-point-list? code-point-in-char-set? diff --git a/src/runtime/ucd-glue.scm b/src/runtime/ucd-glue.scm index 6f79d5a97..d9e591479 100644 --- a/src/runtime/ucd-glue.scm +++ b/src/runtime/ucd-glue.scm @@ -35,6 +35,13 @@ USA. (define-deferred char-alphanumeric? (char-set-predicate char-set:alphanumeric)) +(define-deferred char-set:control + (char-set-union char-set:gc=other:control + char-set:gc=other:format + char-set:gc=other:surrogate + char-set:gc=other:private-use + char-set:gc=other:not-assigned)) + (define-deferred char-set:not-alphabetic (char-set-invert char-set:alphabetic)) @@ -53,6 +60,21 @@ USA. (define-deferred char-set:not-whitespace (char-set-invert char-set:whitespace)) +(define-deferred char-set:punctuation + (char-set-union char-set:gc=punctuation:connector + char-set:gc=punctuation:dash + char-set:gc=punctuation:open + char-set:gc=punctuation:close + char-set:gc=punctuation:initial-quote + char-set:gc=punctuation:final-quote + char-set:gc=punctuation:other)) + +(define-deferred char-set:symbol + (char-set-union char-set:gc=symbol:math + char-set:gc=symbol:currency + char-set:gc=symbol:modifier + char-set:gc=symbol:other)) + (define-deferred char-set:unicode (char-set-difference (char-set-invert (char-set)) char-set:gc=other:surrogate @@ -60,6 +82,27 @@ USA. (define-deferred unicode-char? (char-set-predicate char-set:unicode)) + +(define-deferred char-set:graphic + (char-set-union char-set:alphabetic + char-set:punctuation + char-set:symbol)) +(define-deferred char-set:not-graphic (char-set-invert char-set:graphic)) +(define-deferred char-graphic? (char-set-predicate char-set:graphic)) + +(define-deferred char-set:no-newline + (char-set-difference char-set:unicode (char-set #\newline #\return))) + +(define-deferred char-set:printing + (char-set-union char-set:graphic + char-set:whitespace)) +(define-deferred char-set:not-printing (char-set-invert char-set:printing)) +(define-deferred char-printing? (char-set-predicate char-set:printing)) + +(define-deferred char-set:standard + (char-set-union char-set:graphic (char-set #\newline))) +(define-deferred char-set:not-standard (char-set-invert char-set:standard)) +(define-deferred char-standard? (char-set-predicate char-set:standard)) ;;;; Scheme language: -- 2.25.1