From: Chris Hanson Date: Fri, 15 Jun 2001 20:38:51 +0000 (+0000) Subject: Change representation of character sets so that they do not satisfy X-Git-Tag: 20090517-FFI~2710 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=0448306bb17c88a00ab7b6ba5f154048e81d3dd9;p=mit-scheme.git Change representation of character sets so that they do not satisfy STRING?. Unfortunately, this requires a complete recompilation of the system, since the primitives SUBSTRING-FIND-NEXT-CHAR-IN-SET and SUBSTRING-FIND-PREVIOUS-CHAR-IN-SET were being hard-coded by the syntaxer. --- diff --git a/v7/src/runtime/chrset.scm b/v7/src/runtime/chrset.scm index 5fe18e98f..669d4c3c7 100644 --- a/v7/src/runtime/chrset.scm +++ b/v7/src/runtime/chrset.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: chrset.scm,v 14.12 2001/02/05 19:20:12 cph Exp $ +$Id: chrset.scm,v 14.13 2001/06/15 20:38:37 cph Exp $ Copyright (c) 1988-2001 Massachusetts Institute of Technology @@ -16,7 +16,8 @@ General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. |# ;;;; Character Sets @@ -24,68 +25,71 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. (declare (usual-integrations)) -(define (char-set? object) - (and (string? object) - (fix:= (string-length object) 256) - (not (string-find-next-char-in-set object char-set:not-01)))) +(define-structure (char-set (type-descriptor char-set-rtd)) + (table #f read-only #t)) -(define (guarantee-char-set object procedure) - (if (not (char-set? object)) - (error:wrong-type-argument object "character set" procedure))) +(define-integrable char-set-table-length 256) (define (char-set . chars) (chars->char-set chars)) (define (chars->char-set chars) - (let ((char-set (string-allocate 256))) - (vector-8b-fill! char-set 0 256 0) - (for-each - (lambda (char) - (vector-8b-set! char-set - (let ((code (char->integer char))) - (if (fix:>= code (string-length char-set)) - (error:bad-range-argument chars 'CHARS->CHAR-SET)) - code) - 1)) - chars) - char-set)) + (let ((table (make-string char-set-table-length))) + (vector-8b-fill! table 0 char-set-table-length 0) + (do ((chars chars (cdr chars))) + ((not (pair? chars))) + (vector-8b-set! table + (let ((code (char->integer (car chars)))) + (if (fix:>= code char-set-table-length) + (error:bad-range-argument chars 'CHARS->CHAR-SET)) + code) + 1)) + (make-char-set table))) (define (string->char-set string) - (let ((char-set (string-allocate 256))) - (vector-8b-fill! char-set 0 256 0) + (let ((table (make-string char-set-table-length))) + (vector-8b-fill! table 0 char-set-table-length 0) (do ((i (fix:- (string-length string) 1) (fix:- i 1))) ((fix:< i 0)) - (vector-8b-set! char-set (vector-8b-ref string i) 1)) - char-set)) + (vector-8b-set! table (vector-8b-ref string i) 1)) + (make-char-set table))) (define (ascii-range->char-set lower upper) - (let ((char-set (string-allocate 256))) - (vector-8b-fill! char-set 0 lower 0) - (vector-8b-fill! char-set lower upper 1) - (vector-8b-fill! char-set upper 256 0) - char-set)) + (let ((table (make-string char-set-table-length))) + (vector-8b-fill! table 0 lower 0) + (vector-8b-fill! table lower upper 1) + (vector-8b-fill! table upper char-set-table-length 0) + (make-char-set table))) (define (predicate->char-set predicate) - (let ((char-set (string-allocate 256))) + (let ((table (make-string char-set-table-length))) (let loop ((code 0)) - (if (fix:< code 256) - (begin (vector-8b-set! char-set code - (if (predicate (integer->char code)) 1 0)) - (loop (fix:+ code 1))))) - char-set)) + (if (fix:< code char-set-table-length) + (begin + (vector-8b-set! table + code + (if (predicate (integer->char code)) 1 0)) + (loop (fix:+ code 1))))) + (make-char-set table))) (define (char-set-members char-set) - (guarantee-char-set char-set 'CHAR-SET-MEMBERS) - (let loop ((code 0)) - (cond ((fix:>= code 256) '()) - ((fix:zero? (vector-8b-ref char-set code)) (loop (fix:+ code 1))) - (else (cons (integer->char code) (loop (fix:+ code 1))))))) + (if (not (char-set? char-set)) + (error:wrong-type-argument char-set "character set" 'CHAR-SET-MEMBERS)) + (let ((table (char-set-table char-set))) + (let loop ((code char-set-table-length) (chars '())) + (if (fix:< 0 code) + (loop (fix:- code 1) + (if (fix:zero? (vector-8b-ref table (fix:- code 1))) + chars + (cons (integer->char (fix:- code 1)) chars))) + chars)))) (define (char-set-member? char-set char) - (guarantee-char-set char-set 'CHAR-SET-MEMBER?) + (if (not (char-set? char-set)) + (error:wrong-type-argument char-set "character set" 'CHAR-SET-MEMBER?)) (let ((code (char->integer char))) - (and (fix:< code (string-length char-set)) - (not (fix:zero? (vector-8b-ref char-set code)))))) + (and (fix:< code char-set-table-length) + (not (fix:zero? (vector-8b-ref (char-set-table char-set) code)))))) (define (char-set-invert char-set) (predicate->char-set @@ -165,13 +169,16 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. unspecific) (define-integrable (char-upper-case? char) - (char-set-member? char-set:upper-case char)) + (and (fix:<= (char->integer #\A) char) + (fix:<= char (char->integer #\Z)))) (define-integrable (char-lower-case? char) - (char-set-member? char-set:lower-case char)) + (and (fix:<= (char->integer #\a) char) + (fix:<= char (char->integer #\z)))) (define-integrable (char-numeric? char) - (char-set-member? char-set:numeric char)) + (and (fix:<= (char->integer #\0) char) + (fix:<= char (char->integer #\9)))) (define-integrable (char-graphic? char) (char-set-member? char-set:graphic char)) diff --git a/v7/src/runtime/runtime.pkg b/v7/src/runtime/runtime.pkg index 6ec239048..95c884e63 100644 --- a/v7/src/runtime/runtime.pkg +++ b/v7/src/runtime/runtime.pkg @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: runtime.pkg,v 14.368 2001/06/09 00:34:58 cph Exp $ +$Id: runtime.pkg,v 14.369 2001/06/15 20:38:40 cph Exp $ Copyright (c) 1988-2001 Massachusetts Institute of Technology @@ -345,6 +345,8 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA chars->char-set predicate->char-set string->char-set) + (export (runtime string) + char-set-table) (initialization (initialize-package!))) (define-package (runtime compiler-info) diff --git a/v7/src/runtime/runtime.sf b/v7/src/runtime/runtime.sf index a80d57b0a..2301b39bf 100644 --- a/v7/src/runtime/runtime.sf +++ b/v7/src/runtime/runtime.sf @@ -1,8 +1,8 @@ #| -*-Scheme-*- -$Id: runtime.sf,v 14.13 1999/01/02 06:11:34 cph Exp $ +$Id: runtime.sf,v 14.14 2001/06/15 20:38:43 cph Exp $ -Copyright (c) 1988-1999 Massachusetts Institute of Technology +Copyright (c) 1988-2001 Massachusetts Institute of Technology This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,16 +16,16 @@ General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. |# (fluid-let ((sf/default-syntax-table syntax-table/system-internal)) - (sf-conditionally "char") + (sf-conditionally "chrset") + (sf-conditionally "gentag") (sf-conditionally "graphics") (sf-conditionally "infstr") (sf-conditionally "os2winp") - (sf-conditionally "gentag") - (sf-conditionally "gencache") (sf-directory ".")) ;; Guarantee that the package modeller is loaded. load-option ensures diff --git a/v7/src/runtime/string.scm b/v7/src/runtime/string.scm index d41630c40..f9f32d9c5 100644 --- a/v7/src/runtime/string.scm +++ b/v7/src/runtime/string.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: string.scm,v 14.42 2001/03/21 05:41:41 cph Exp $ +$Id: string.scm,v 14.43 2001/06/15 20:38:46 cph Exp $ Copyright (c) 1988-2001 Massachusetts Institute of Technology @@ -34,7 +34,8 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ;; (prefixed with `%') that assumes all arguments have been checked. ;; This avoids repeated argument checks. -(declare (usual-integrations)) +(declare (usual-integrations) + (integrate-external "chrset")) ;;;; Primitives @@ -44,8 +45,6 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA string-maximum-length set-string-maximum-length! substring=? substring-ci=? substringsymbol string) - ;; This prevents the symbol from being affected if the string - ;; is mutated. The string is copied only if the symbol is - ;; created. + ;; Calling STRING-COPY prevents the symbol from being affected if + ;; the string is mutated. The string is copied only if the symbol + ;; is created. (or ((ucode-primitive find-symbol) string) ((ucode-primitive string->symbol) (string-copy string)))) -(define-integrable (intern string) - ((ucode-primitive string->symbol) (string-downcase string))) +(define (intern string) + (if (string-lower-case? string) + (string->symbol string) + ((ucode-primitive string->symbol) (string-downcase string)))) -(define-integrable (intern-soft string) - ((ucode-primitive find-symbol) (string-downcase string))) +(define (intern-soft string) + ((ucode-primitive find-symbol) + (if (string-lower-case? string) + string + (string-downcase string)))) (define (symbol-name symbol) (if (not (symbol? symbol)) diff --git a/v7/src/runtime/unpars.scm b/v7/src/runtime/unpars.scm index a2c2e760b..df1966b72 100644 --- a/v7/src/runtime/unpars.scm +++ b/v7/src/runtime/unpars.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: unpars.scm,v 14.47 2001/03/21 19:15:26 cph Exp $ +$Id: unpars.scm,v 14.48 2001/06/15 20:38:51 cph Exp $ Copyright (c) 1988-2001 Massachusetts Institute of Technology @@ -340,53 +340,50 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA (*unparse-char character))) (define (unparse/string string) - (cond ((char-set? string) - (*unparse-with-brackets 'CHARACTER-SET string false)) - ((not *slashify?*) - (*unparse-string string)) - (else - (let ((end (string-length string))) - (let ((end* - (if *unparser-string-length-limit* - (min *unparser-string-length-limit* end) - end))) - (*unparse-char #\") - (if (substring-find-next-char-in-set string 0 end* - string-delimiters) - (let loop ((start 0)) - (let ((index - (substring-find-next-char-in-set string start end* - string-delimiters))) - (if index - (begin - (*unparse-substring string start index) - (*unparse-char #\\) - (let ((char (string-ref string index))) - (cond ((char=? char char:newline) - (*unparse-char #\n)) - ((char=? char #\Tab) - (*unparse-char #\t)) - ((char=? char #\VT) - (*unparse-char #\v)) - ((char=? char #\BS) - (*unparse-char #\b)) - ((char=? char #\Return) - (*unparse-char #\r)) - ((char=? char #\Page) - (*unparse-char #\f)) - ((char=? char #\BEL) - (*unparse-char #\a)) - ((or (char=? char #\\) - (char=? char #\")) - (*unparse-char char)) - (else - (*unparse-string (char->octal char))))) - (loop (+ index 1))) - (*unparse-substring string start end*)))) - (*unparse-substring string 0 end*)) - (if (< end* end) - (*unparse-string "...")) - (*unparse-char #\")))))) + (if *slashify?* + (let ((end (string-length string))) + (let ((end* + (if *unparser-string-length-limit* + (min *unparser-string-length-limit* end) + end))) + (*unparse-char #\") + (if (substring-find-next-char-in-set string 0 end* + string-delimiters) + (let loop ((start 0)) + (let ((index + (substring-find-next-char-in-set string start end* + string-delimiters))) + (if index + (begin + (*unparse-substring string start index) + (*unparse-char #\\) + (let ((char (string-ref string index))) + (cond ((char=? char char:newline) + (*unparse-char #\n)) + ((char=? char #\Tab) + (*unparse-char #\t)) + ((char=? char #\VT) + (*unparse-char #\v)) + ((char=? char #\BS) + (*unparse-char #\b)) + ((char=? char #\Return) + (*unparse-char #\r)) + ((char=? char #\Page) + (*unparse-char #\f)) + ((char=? char #\BEL) + (*unparse-char #\a)) + ((or (char=? char #\\) + (char=? char #\")) + (*unparse-char char)) + (else + (*unparse-string (char->octal char))))) + (loop (+ index 1))) + (*unparse-substring string start end*)))) + (*unparse-substring string 0 end*)) + (if (< end* end) + (*unparse-string "...")) + (*unparse-char #\"))) + (*unparse-string string))) (define (char->octal char) (let ((qr1 (integer-divide (char->ascii char) 8)))