From accd5bae0e74d1b0f39949b4c26bf780a669a68c Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Thu, 13 May 1999 03:04:46 +0000 Subject: [PATCH] Initial revision --- v7/src/runtime/chrsyn.scm | 143 ++++++++++++++++++++++++++++++++++++++ v7/src/runtime/regexp.scm | 125 +++++++++++++++++++++++++++++++++ 2 files changed, 268 insertions(+) create mode 100644 v7/src/runtime/chrsyn.scm create mode 100644 v7/src/runtime/regexp.scm diff --git a/v7/src/runtime/chrsyn.scm b/v7/src/runtime/chrsyn.scm new file mode 100644 index 000000000..789f61774 --- /dev/null +++ b/v7/src/runtime/chrsyn.scm @@ -0,0 +1,143 @@ +;;; -*-Scheme-*- +;;; +;;; $Id: chrsyn.scm,v 1.1 1999/05/13 03:04:36 cph Exp $ +;;; +;;; Copyright (c) 1986, 1989-1999 Massachusetts Institute of Technology +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 2 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +;;;; Character-Syntax Tables +;;; package: (runtime char-syntax) + +(declare (usual-integrations)) + +(define-structure (char-syntax-table (constructor %make-char-syntax-table) + (conc-name char-syntax-table/)) + (entries #f read-only #t)) + +(define (guarantee-char-syntax-table table procedure) + (if (not (char-syntax-table? table)) + (error:wrong-type-argument table "char-syntax table" procedure)) + (char-syntax-table/entries table)) + +(define (make-char-syntax-table #!optional table) + (%make-char-syntax-table + (vector-copy + (if (or (default-object? table) (not table)) + (char-syntax-table/entries standard-char-syntax-table) + (guarantee-char-syntax-table table 'MAKE-CHAR-SYNTAX-TABLE))))) + +(define (get-char-syntax table char) + (if (not (char? char)) + (error:wrong-type-argument char "character" 'GET-CHAR-SYNTAX)) + (vector-ref (guarantee-char-syntax-table table 'GET-CHAR-SYNTAX) + (char->ascii char))) + +(define (set-char-syntax! table char string) + (let ((entries (guarantee-char-syntax-table table 'SET-CHAR-SYNTAX!)) + (entry (string->char-syntax string))) + (cond ((char? char) + (vector-set! entries (char->ascii char) entry)) + ((char-set? char) + (for-each (lambda (ascii) (vector-set! entries ascii entry)) + (char-set-members char))) + (else + (error:wrong-type-argument char "character" 'SET-CHAR-SYNTAX!))))) + +(define standard-char-syntax-table) + +(define (initialize-package!) + (let ((table + (%make-char-syntax-table + (make-vector 256 (string->char-syntax ""))))) + (set-char-syntax! table char-set:alphanumeric "w") + (set-char-syntax! table #\$ "w") + (set-char-syntax! table #\% "w") + (set-char-syntax! table #\( "()") + (set-char-syntax! table #\) ")(") + (set-char-syntax! table #\[ "(]") + (set-char-syntax! table #\] ")[") + (set-char-syntax! table #\{ "(}") + (set-char-syntax! table #\} "){") + (set-char-syntax! table #\" "\"") + (set-char-syntax! table #\\ "\\") + (set-char-syntax! table (string->char-set "_-+*/&|<>=") "_") + (set-char-syntax! table (string->char-set ".,;:?!#@~^'`") ".") + (set! standard-char-syntax-table table) + unspecific)) + +(define-primitives + (string->char-syntax string->syntax-entry)) + +(define (char-syntax->string entry) + (guarantee-char-syntax entry 'CHAR-SYNTAX->STRING) + (let ((code (fix:and #xf entry))) + (string-append + (vector-ref char-syntax-codes code) + (let ((match (fix:and #xff (fix:lsh entry -4)))) + (if (zero? match) + " " + (string (ascii->char match)))) + (let ((cbits (fix:and #xFF (fix:lsh entry -12)))) + (string-append + (if (fix:= 0 (fix:and #x40 cbits)) "" "1") + (if (fix:= 0 (fix:and #x10 cbits)) "" "2") + (if (fix:= 0 (fix:and #x04 cbits)) "" "3") + (if (fix:= 0 (fix:and #x01 cbits)) "" "4") + (if (or (fix:= 0 (fix:and #x80 cbits)) + (and (fix:= code 11) + (fix:= #x80 (fix:and #xC0 cbits)))) + "" + "5") + (if (fix:= 0 (fix:and #x20 cbits)) "" "6") + (if (or (fix:= 0 (fix:and #x08 cbits)) + (and (fix:= code 12) + (fix:= #x08 (fix:and #x0C cbits)))) + "" + "7") + (if (fix:= 0 (fix:and #x02 cbits)) "" "8"))) + (if (fix:= 0 (fix:and #x100000 entry)) "" "p")))) + +(define (guarantee-char-syntax object procedure) + (if (not (index-fixnum? object)) + (error:wrong-type-argument object "non-negative fixnum" procedure)) + (if (not (and (fix:< object #x200000) + (fix:<= (fix:and #xf object) 12))) + (error:bad-range-argument object procedure))) + +(define char-syntax-codes + '#(" " "." "w" "_" "(" ")" "'" "\"" "$" "\\" "/" "<" ">")) + +(define (substring-find-next-char-of-syntax string start end table code) + (guarantee-substring string start end 'SUBSTRING-FIND-NEXT-CHAR-OF-SYNTAX) + (let loop ((index start)) + (and (fix:< index end) + (if (char=? code (char->syntax-code table (string-ref string index))) + index + (loop (fix:+ index 1)))))) + +(define (substring-find-next-char-not-of-syntax string start end table code) + (guarantee-substring string start end + 'SUBSTRING-FIND-NEXT-CHAR-NOT-OF-SYNTAX) + (let loop ((index start)) + (and (fix:< index end) + (if (char=? code (char->syntax-code table (string-ref string index))) + (loop (fix:+ index 1)) + index)))) + +(define (char->syntax-code table char) + (string-ref (vector-ref char-syntax-codes + (fix:and #xf (get-char-syntax table char))) + 0)) \ No newline at end of file diff --git a/v7/src/runtime/regexp.scm b/v7/src/runtime/regexp.scm new file mode 100644 index 000000000..73adb6cab --- /dev/null +++ b/v7/src/runtime/regexp.scm @@ -0,0 +1,125 @@ +;;; -*-Scheme-*- +;;; +;;; $Id: regexp.scm,v 1.1 1999/05/13 03:04:46 cph Exp $ +;;; +;;; Copyright (c) 1986, 1989-1999 Massachusetts Institute of Technology +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 2 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +;;;; Regular Expressions +;;; package: (runtime regular-expression) + +(declare (usual-integrations)) + +(define registers) + +(define (initialize-package!) + (set! registers (make-vector 20 #f)) + unspecific) + +(define (re-match-start-index i) + (guarantee-re-register i 'RE-MATCH-START-INDEX) + (vector-ref registers i)) + +(define (re-match-end-index i) + (guarantee-re-register i 'RE-MATCH-END-INDEX) + (vector-ref registers (fix:+ i 10))) + +(define (guarantee-re-register i operator) + (if (not (and (exact-nonnegative-integer? i) (< i 10))) + (error:wrong-type-argument i "regular-expression register" operator))) + +(define (re-registers) + (vector-copy registers)) + +(define (set-re-registers! registers*) + (guarantee-re-registers registers* 'SET-RE-REGISTERS!) + (do ((i 0 (fix:+ i 1))) + ((fix:= 20 i)) + (vector-set! registers i (vector-ref registers* i)))) + +(define (guarantee-re-registers object procedure) + (if (not (re-registers? object)) + (error:wrong-type-argument object "regular-expression registers" + procedure))) + +(define (re-registers? object) + (and (vector? object) + (fix:= 20 (vector-length object)) + (let loop ((i 0)) + (or (fix:= 20 i) + (and (or (index-fixnum? (vector-ref object i)) + (not (vector-ref object i))) + (loop (fix:+ i 1))))))) + +(define (preserving-re-registers thunk) + (let ((registers* unspecific)) + (dynamic-wind (lambda () (set! registers* (re-registers)) unspecific) + thunk + (lambda () (set-re-registers! registers*))))) + +(define (regexp-group . alternatives) + (let ((alternatives + (list-transform-positive alternatives identity-procedure))) + (if (null? alternatives) + "\\(\\)" + (apply string-append + (cons "\\(" + (let loop ((alternatives alternatives)) + (cons (car alternatives) + (if (null? (cdr alternatives)) + (list "\\)") + (cons "\\|" (loop (cdr alternatives))))))))))) + +(define (make-substring-operation primitive) + (lambda (regexp string start end #!optional case-fold? syntax-table) + (let ((regexp + (if (compiled-regexp? regexp) + regexp + (re-compile-pattern regexp + (if (default-object? case-fold?) + #f + case-fold?))))) + (primitive (compiled-regexp/byte-stream regexp) + (compiled-regexp/translation-table regexp) + (char-syntax-table/entries + (if (or (default-object? syntax-table) (not syntax-table)) + standard-char-syntax-table + syntax-table)) + registers string start end)))) + +(define re-substring-match + (make-substring-operation (ucode-primitive re-match-substring))) + +(define re-substring-search-forward + (make-substring-operation (ucode-primitive re-search-substring-forward))) + +(define re-substring-search-backward + (make-substring-operation (ucode-primitive re-search-substring-backward))) + +(define (make-string-operation substring-operation) + (lambda (regexp string #!optional case-fold? syntax-table) + (substring-operation regexp string 0 (string-length string) + (if (default-object? case-fold?) #f case-fold?) + (if (default-object? syntax-table) #f syntax-table)))) + +(define re-string-match + (make-string-operation re-substring-match)) + +(define re-string-search-forward + (make-string-operation re-substring-search-forward)) + +(define re-string-search-backward + (make-string-operation re-substring-search-backward)) \ No newline at end of file -- 2.25.1