From 39226da0b4ba92e8a666d7e6f66ef7fb2a44468d Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Mon, 2 Dec 2019 00:37:01 -0800 Subject: [PATCH] Char-set refactor: SRFI 14 complete. --- src/libraries/srfi-14.scm | 167 +++++++++++++++++++++++++++++++++++++ src/runtime/char-set.scm | 126 +++++++--------------------- src/runtime/mit-macros.scm | 1 + src/runtime/runtime.pkg | 1 + 4 files changed, 201 insertions(+), 94 deletions(-) create mode 100644 src/libraries/srfi-14.scm diff --git a/src/libraries/srfi-14.scm b/src/libraries/srfi-14.scm new file mode 100644 index 000000000..b2c4d172c --- /dev/null +++ b/src/libraries/srfi-14.scm @@ -0,0 +1,167 @@ +#| -*-Scheme-*- + +Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, + 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, + 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, + 2017, 2018 Massachusetts Institute of Technology + +This file is part of MIT/GNU Scheme. + +MIT/GNU Scheme is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at +your option) any later version. + +MIT/GNU Scheme is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with MIT/GNU Scheme; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, +USA. + +|# + +;;;; SRFI 14: Character-set Library + +(define-library (srfi 14) + (import (scheme base) + (scheme char) + (only (srfi 1) + drop-right + last) + (srfi 143) + (only (mit legacy runtime) + ->char-set + char-set->list + char-set->string + char-set-adjoin + char-set-adjoin! + char-set-any + char-set-complement + char-set-complement! + char-set-contains? + char-set-copy + char-set-count + char-set-cursor + char-set-cursor-next + char-set-delete + char-set-delete! + char-set-diff+intersection + char-set-diff+intersection! + char-set-difference + char-set-difference! + char-set-every + char-set-filter + char-set-filter! + char-set-fold + char-set-for-each + char-set-hash + char-set-intersection + char-set-intersection! + char-set-map + char-set-ref + char-set-size + char-set-unfold + char-set-unfold! + char-set-union + char-set-union! + char-set-xor + char-set-xor! + char-set:ascii + char-set:blank + char-set:digit + char-set:empty + char-set:full + char-set:graphic + char-set:hex-digit + char-set:iso-control + char-set:letter + char-set:letter+digit + char-set:lower-case + char-set:printing + char-set:punctuation + char-set:symbol + char-set:title-case + char-set:upper-case + char-set:whitespace + char-set<= + char-set= + char-set? + end-of-char-set? + list->char-set + list->char-set! + string->char-set + string->char-set! + ucs-range->char-set + ucs-range->char-set!)) + (export ->char-set + char-set + char-set->list + char-set->string + char-set-adjoin + char-set-adjoin! + char-set-any + char-set-complement + char-set-complement! + char-set-contains? + char-set-copy + char-set-count + char-set-cursor + char-set-cursor-next + char-set-delete + char-set-delete! + char-set-diff+intersection + char-set-diff+intersection! + char-set-difference + char-set-difference! + char-set-every + char-set-filter + char-set-filter! + char-set-fold + char-set-for-each + char-set-hash + char-set-intersection + char-set-intersection! + char-set-map + char-set-ref + char-set-size + char-set-unfold + char-set-unfold! + char-set-union + char-set-union! + char-set-xor + char-set-xor! + char-set:ascii + char-set:blank + char-set:digit + char-set:empty + char-set:full + char-set:graphic + char-set:hex-digit + char-set:iso-control + char-set:letter + char-set:letter+digit + char-set:lower-case + char-set:printing + char-set:punctuation + char-set:symbol + char-set:title-case + char-set:upper-case + char-set:whitespace + char-set<= + char-set= + char-set? + end-of-char-set? + list->char-set + list->char-set! + string->char-set + string->char-set! + ucs-range->char-set + ucs-range->char-set!) + (begin + (define (char-set . chars) + (list->char-set chars)) + )) \ No newline at end of file diff --git a/src/runtime/char-set.scm b/src/runtime/char-set.scm index f0fe91efe..dcf606f25 100644 --- a/src/runtime/char-set.scm +++ b/src/runtime/char-set.scm @@ -255,6 +255,16 @@ USA. (define-integrable (reverse-ilist-cons start end ilist) (cons end (cons start ilist))) + +(define (ilist= ilist1 ilist2) + (if (and (pair? ilist1) (pair? ilist2)) + (and (fix:= (car ilist1) (car ilist2)) + (ilist= (cdr ilist1) (cdr ilist2))) + (and (null? ilist1) (null? ilist2)))) + +(define (ilist<= ilist1 ilist2) + (ilist= (ilist-union (ilist-difference ilist2 ilist1) ilist1) + ilist2)) (define (ilist-combiner combine) @@ -465,6 +475,16 @@ USA. (cdr char-sets)) #t)) +(define (char-set<= . char-sets) + (if (and (pair? char-sets) + (pair? (cdr char-sets))) + (let loop ((ilists (map char-set->list char-sets))) + (and (ilist<= (car ilists) (cadr ilists)) + (if (pair? (cdr ilists)) + (loop (cdr ilists)) + #t))) + #t)) + (define (char-set-hash char-set #!optional modulus) (let ((hash (primitive-object-hash-2 (%char-set-low char-set) @@ -675,24 +695,8 @@ USA. (char-set->list base-set)) char-set))) -(define (compute-char-set procedure) - - (define (find-start cp end ilist) - (if (fix:< cp end) - (if (procedure cp) - (find-end (fix:+ cp 1) end cp ilist) - (find-start (fix:+ cp 1) end ilist)) - ilist)) - - (define (find-end cp end start ilist) - (if (fix:< cp end) - (if (procedure cp) - (find-end (fix:+ cp 1) end start ilist) - (find-start (fix:+ cp 1) end (ilist-cons cp start ilist))) - (ilist-cons end start ilist))) - - (ilist->char-set - (reverse! (find-start #xE000 #x110000 (find-start 0 #xD800 '()))))) +(define (compute-char-set proc) + (char-set-filter proc char-set:full)) (define (ucs-range->char-set lower upper #!optional error? base-set) (declare (ignore error?)) @@ -717,85 +721,19 @@ USA. (list->string (char-set->list char-set))) (define (char-set->ilist char-set) - (reverse! - (%high->ilist (%char-set-high char-set) - (%low->ilist (%char-set-low char-set))))) - -(define (%low->ilist low) - (let ((low-limit (%low-limit low))) - - (define (find-start i result) - (if (fix:< i low-limit) - (if (%low-ref low i) - (find-end i result) - (find-start (fix:+ i 1) result)) - result)) - - (define (find-end start result) - (let loop ((i (fix:+ start 1))) - (if (fix:< i low-limit) - (if (%low-ref low i) - (loop (fix:+ i 1)) - (find-start i (reverse-ilist-cons start i result))) - (reverse-ilist-cons start low-limit result)))) - - (find-start 0 '()))) - -(define (%high->ilist high result) - (let ((n (%high-limit high))) - - (define (loop i result) - (if (fix:< i n) - (loop (fix:+ i 1) - (cons (%high-ref high i) result)) - result)) - - (if (and (fix:> n 0) - (pair? result) - (fix:= (%high-ref high 0) (car result))) - (loop 1 (cdr result)) - (loop 0 result)))) + (char-set-range-fold-right ilist-cons '() char-set)) (define (char-set->code-points char-set) - (let loop ((ilist (char-set->ilist char-set)) (ranges '())) - (if (pair? ilist) - (loop (cddr ilist) - (cons (make-range (car ilist) (cadr ilist)) - ranges)) - (reverse! ranges)))) - + (char-set-range-fold-right (lambda (start end cpl) + (cons (make-range start end) cpl)) + '() + char-set)) + (define (char-set-size char-set) - (fix:+ (%low-size (%char-set-low char-set)) - (%high-size (%char-set-high char-set)))) - -(define (%low-size low) - (let ((low-limit (%low-limit low))) - - (define (find-start i size) - (if (fix:< i low-limit) - (if (%low-ref low i) - (let ((end (find-end (fix:+ i 1)))) - (find-start end (fix:+ size (fix:- end i)))) - (find-start (fix:+ i 1) size)) - size)) - - (define (find-end i) - (if (fix:< i low-limit) - (if (%low-ref low i) - (find-end (fix:+ i 1)) - i) - low-limit)) - - (find-start 0 0))) - -(define (%high-size high) - (let ((end (%high-limit high))) - (do ((index 0 (fix:+ index 2)) - (size 0 - (fix:+ size - (fix:- (%high-ref high (fix:+ index 1)) - (%high-ref high index))))) - ((not (fix:< index end)) size)))) + (char-set-range-fold (lambda (start end size) + (fix:+ (fix:- end start) size)) + 0 + char-set)) (define (char-set-count pred char-set) (char-set-fold-right (lambda (char count) diff --git a/src/runtime/mit-macros.scm b/src/runtime/mit-macros.scm index 5ecbbdcaa..955d5eaa1 100644 --- a/src/runtime/mit-macros.scm +++ b/src/runtime/mit-macros.scm @@ -794,6 +794,7 @@ USA. (define-feature 'srfi-6 always) ;Basic String Ports (define-feature 'srfi-8 always) ;RECEIVE (define-feature 'srfi-9 always) ;DEFINE-RECORD-TYPE +(define-feature 'srfi-14 always) ;Character-set Library (define-feature 'srfi-23 always) ;ERROR (define-feature 'srfi-27 always) ;Sources of Random Bits (define-feature 'srfi-30 always) ;Nested Multi-Line Comments (#| ... |#) diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index 3c0bf41e6..773349ac4 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -1682,6 +1682,7 @@ USA. char-set:hex-digit ;SRFI 14 char-set:iso-control ;SRFI 14 char-set:wsp + char-set<= ;SRFI 14 char-set= ;SRFI 14 char-set? ;SRFI 14 char-sets-disjoint? -- 2.25.1