Add character sets needed for SRFI 115.
authorChris Hanson <org/chris-hanson/cph>
Mon, 25 Nov 2019 21:39:17 +0000 (13:39 -0800)
committerChris Hanson <org/chris-hanson/cph>
Wed, 27 Nov 2019 08:34:44 +0000 (00:34 -0800)
src/runtime/char-set.scm
src/runtime/runtime.pkg
src/runtime/ucd-glue.scm

index 6ddc12f84f9a0cc1d2b1c71bfbdeee4275971cb1..a91e0317026de94c1cc9340ee9334bdf1d6a0602 100644 (file)
@@ -370,14 +370,23 @@ USA.
 
 (define (name->char-set name)
   (case name
-    ((alphabetic) char-set:alphabetic)
-    ((alphanumeric) char-set:alphanumeric)
+    ((alphabetic alpha) char-set:alphabetic)
+    ((alphanumeric alphanum alnum) char-set:alphanumeric)
+    ((ascii) char-set:ascii)
     ((cased) char-set:cased)
-    ((lower-case) char-set:lower-case)
-    ((numeric) char-set:numeric)
-    ((unicode) char-set:unicode)
-    ((upper-case) char-set:upper-case)
-    ((whitespace) char-set:whitespace)
+    ((control cntrl) char-set:control)
+    ((graphic graph) char-set:graphic)
+    ((hex-digit xdigit) char-set:hex-digit)
+    ((lower-case lower) char-set:lower-case)
+    ((no-newline nonl) char-set:no-newline)
+    ((numeric num) char-set:numeric)
+    ((printing print) char-set:printing)
+    ((punctuation punct) char-set:punctuation)
+    ((symbol) char-set:symbol)
+    ((title-case title) char-set:title-case)
+    ((unicode any) char-set:unicode)
+    ((upper-case upper) char-set:upper-case)
+    ((whitespace white space) char-set:whitespace)
     (else #f)))
 
 (define (%range? object)
@@ -575,29 +584,17 @@ USA.
 ;;;; Miscellaneous character sets
 
 (define char-ctl?)
-(define char-graphic?)
 (define char-set:ascii)
 (define char-set:ctls)
-(define char-set:graphic)
+(define char-set:hex-digit)
 (define char-set:newline)
-(define char-set:not-graphic)
-(define char-set:not-standard)
-(define char-set:standard)
 (define char-set:wsp)
-(define char-standard?)
 (define char-wsp?)
 (add-boot-init!
  (lambda ()
-   (set! char-set:graphic (%inversion-list->char-set '(#x20 #x7F #xA0 #x100)))
-   (set! char-set:not-graphic (char-set-invert char-set:graphic))
-   (set! char-graphic? (char-set-predicate char-set:graphic))
-
-   (set! char-set:standard
-        (char-set-union char-set:graphic (char-set #\newline)))
-   (set! char-set:not-standard (char-set-invert char-set:standard))
-   (set! char-standard? (char-set-predicate char-set:standard))
 
    (set! char-set:newline (char-set #\newline))
+   (set! char-set:hex-digit (char-set "0123456789abcdefABCDEF"))
 
    ;; Used in RFCs:
 
index ca56a8b528851d22f14eafd0f917d3cc479433ee..72abc6f271e4ccc6d0542fe7ef8ebc551ba9cd1f 100644 (file)
@@ -1494,6 +1494,7 @@ USA.
   (parent (runtime))
   (export ()
          (char-set:numeric char-set:nt=decimal)
+         (char-set:title-case char-set:gc=letter:titlecase)
          (char-numeric? char-nt=decimal?)
          char-alphabetic?
          char-cased?
@@ -1530,6 +1531,8 @@ USA.
          char-set:gc=number:decimal-digit
          char-set:gc=number:letter
          char-set:gc=number:other
+         char-set:gc=other:control
+         char-set:gc=other:format
          char-set:gc=other:not-assigned
          char-set:gc=other:private-use
          char-set:gc=other:surrogate
@@ -1567,14 +1570,27 @@ USA.
   (parent (runtime))
   (export ()
          char-alphanumeric?
+         char-graphic?
+         char-printing?
          char-set:alphanumeric
+         char-set:control
+         char-set:graphic
+         char-set:no-newline
          char-set:not-alphabetic
          char-set:not-alphanumeric
+         char-set:not-graphic
          char-set:not-lower-case
          char-set:not-numeric
+         char-set:not-printing
+         char-set:not-standard
          char-set:not-upper-case
          char-set:not-whitespace
+         char-set:printing
+         char-set:punctuation
+         char-set:standard
+         char-set:symbol
          char-set:unicode
+         char-standard?
          unicode-char?)
   (export (runtime)
          char-set:folded-symbol-constituent
@@ -1596,7 +1612,6 @@ USA.
          8-bit-char-set?
          ascii-range->char-set
          char-ctl?
-         char-graphic?
          char-in-set?
          char-set
          char-set*
@@ -1613,16 +1628,12 @@ USA.
          char-set-union*
          char-set:ascii
          char-set:ctls
-         char-set:graphic
+         char-set:hex-digit
          char-set:newline
-         char-set:not-graphic
-         char-set:not-standard
-         char-set:standard
          char-set:wsp
          char-set=?
          char-set?
          char-sets-disjoint?
-         char-standard?
          char-wsp?
          code-point-list?
          code-point-in-char-set?
index 6f79d5a97a2c6768b400024ff4a18fb1f1252247..d9e591479f82307b3ae13871c58243e7aa7a9d23 100644 (file)
@@ -35,6 +35,13 @@ USA.
 (define-deferred char-alphanumeric?
   (char-set-predicate char-set:alphanumeric))
 
+(define-deferred char-set:control
+  (char-set-union char-set:gc=other:control
+                 char-set:gc=other:format
+                 char-set:gc=other:surrogate
+                 char-set:gc=other:private-use
+                 char-set:gc=other:not-assigned))
+
 (define-deferred char-set:not-alphabetic
   (char-set-invert char-set:alphabetic))
 
@@ -53,6 +60,21 @@ USA.
 (define-deferred char-set:not-whitespace
   (char-set-invert char-set:whitespace))
 
+(define-deferred char-set:punctuation
+  (char-set-union char-set:gc=punctuation:connector
+                 char-set:gc=punctuation:dash
+                 char-set:gc=punctuation:open
+                 char-set:gc=punctuation:close
+                 char-set:gc=punctuation:initial-quote
+                 char-set:gc=punctuation:final-quote
+                 char-set:gc=punctuation:other))
+
+(define-deferred char-set:symbol
+  (char-set-union char-set:gc=symbol:math
+                 char-set:gc=symbol:currency
+                 char-set:gc=symbol:modifier
+                 char-set:gc=symbol:other))
+
 (define-deferred char-set:unicode
   (char-set-difference (char-set-invert (char-set))
                       char-set:gc=other:surrogate
@@ -60,6 +82,27 @@ USA.
 
 (define-deferred unicode-char?
   (char-set-predicate char-set:unicode))
+
+(define-deferred char-set:graphic
+  (char-set-union char-set:alphabetic
+                 char-set:punctuation
+                 char-set:symbol))
+(define-deferred char-set:not-graphic (char-set-invert char-set:graphic))
+(define-deferred char-graphic? (char-set-predicate char-set:graphic))
+
+(define-deferred char-set:no-newline
+  (char-set-difference char-set:unicode (char-set #\newline #\return)))
+
+(define-deferred char-set:printing
+  (char-set-union char-set:graphic
+                 char-set:whitespace))
+(define-deferred char-set:not-printing (char-set-invert char-set:printing))
+(define-deferred char-printing? (char-set-predicate char-set:printing))
+
+(define-deferred char-set:standard
+  (char-set-union char-set:graphic (char-set #\newline)))
+(define-deferred char-set:not-standard (char-set-invert char-set:standard))
+(define-deferred char-standard? (char-set-predicate char-set:standard))
 \f
 ;;;; Scheme language: