Move unicode support into runtime system.
authorChris Hanson <org/chris-hanson/cph>
Sun, 11 Nov 2001 06:02:52 +0000 (06:02 +0000)
committerChris Hanson <org/chris-hanson/cph>
Sun, 11 Nov 2001 06:02:52 +0000 (06:02 +0000)
v7/src/runtime/ed-ffi.scm
v7/src/runtime/runtime.pkg
v7/src/runtime/unicode.scm [moved from v7/src/star-parser/unicode.scm with 99% similarity]
v7/src/star-parser/buffer.scm [deleted file]
v7/src/star-parser/compile.scm
v7/src/star-parser/ed-ffi.scm
v7/src/star-parser/parser.pkg

index 1e31dda8c6c53ef0d31831348f9243421b5e4556..7cd66d0cf40a308da0fee18cc0c06a897a098717 100644 (file)
@@ -1,6 +1,6 @@
 #| -*- Scheme -*-
 
-$Id: ed-ffi.scm,v 1.29 2001/11/11 05:52:30 cph Exp $
+$Id: ed-ffi.scm,v 1.30 2001/11/11 05:58:39 cph Exp $
 
 Copyright (c) 1988-2001 Massachusetts Institute of Technology
 
@@ -286,6 +286,8 @@ USA.
                syntax-table/system-internal)
     ("uerror"  (runtime microcode-errors)
                syntax-table/system-internal)
+    ("unicode" (runtime unicode)
+               syntax-table/system-internal)
     ("unpars"  (runtime unparser)
                syntax-table/system-internal)
     ("unsyn"   (runtime unsyntaxer)
index 25db79b6688ea4232b45fb01e16c05ab7a848049..f3719896bcfabfb1942c6a58acf57e27ff6e469e 100644 (file)
@@ -1,6 +1,6 @@
 #| -*-Scheme-*-
 
-$Id: runtime.pkg,v 14.382 2001/11/11 05:51:51 cph Exp $
+$Id: runtime.pkg,v 14.383 2001/11/11 05:58:56 cph Exp $
 
 Copyright (c) 1988-2001 Massachusetts Institute of Technology
 
@@ -3798,4 +3798,28 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
          set-parser-buffer-pointer!
          source->parser-buffer
          string->parser-buffer
-         substring->parser-buffer))
\ No newline at end of file
+         substring->parser-buffer))
+
+(define-package (runtime unicode)
+  (files "unicode")
+  (parent ())
+  (export ()
+         8-bit-alphabet?
+         alphabet+
+         alphabet-
+         alphabet->char-set
+         alphabet->code-points
+         alphabet->string
+         alphabet?
+         char-in-alphabet?
+         char-set->alphabet
+         code-point->utf8-string
+         code-point-in-alphabet?
+         code-points->alphabet
+         read-utf8-code-point
+         read-utf8-code-point-from-source
+         string->alphabet
+         unicode-code-point?
+         utf8-string->code-point
+         well-formed-code-points-list?
+         write-utf8-code-point))
\ No newline at end of file
similarity index 99%
rename from v7/src/star-parser/unicode.scm
rename to v7/src/runtime/unicode.scm
index 75e62208e74197f930d11f99d399bce217e96027..73cf601ea8110cece518794e496e31562cc68257 100644 (file)
@@ -1,6 +1,6 @@
 ;;; -*-Scheme-*-
 ;;;
-;;; $Id: unicode.scm,v 1.6 2001/10/04 16:59:18 cph Exp $
+;;; $Id: unicode.scm,v 1.1 2001/11/11 05:58:04 cph Exp $
 ;;;
 ;;; Copyright (c) 2001 Massachusetts Institute of Technology
 ;;;
diff --git a/v7/src/star-parser/buffer.scm b/v7/src/star-parser/buffer.scm
deleted file mode 100644 (file)
index d9d53b7..0000000
+++ /dev/null
@@ -1,310 +0,0 @@
-;;; -*-Scheme-*-
-;;;
-;;; $Id: buffer.scm,v 1.11 2001/10/04 16:59:56 cph Exp $
-;;;
-;;; Copyright (c) 2001 Massachusetts Institute of Technology
-;;;
-;;; This program is free software; you can redistribute it and/or
-;;; modify it under the terms of the GNU General Public License as
-;;; published by the Free Software Foundation; either version 2 of the
-;;; License, or (at your option) any later version.
-;;;
-;;; This program is distributed in the hope that it will be useful,
-;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-;;; General Public License for more details.
-;;;
-;;; You should have received a copy of the GNU General Public License
-;;; along with this program; if not, write to the Free Software
-;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-;;; 02111-1307, USA.
-
-;;;; Parser-buffer abstraction
-
-(declare (usual-integrations))
-\f
-;;;; Parser buffer abstraction
-
-(define-structure parser-buffer
-  ;; The string buffer, as a substring:
-  string
-  start
-  end
-  ;; The offset of the string buffer within the character stream.
-  ;; This is always zero if SOURCE is #F.
-  base-offset
-  ;; Our current position in the buffer.
-  index
-  ;; A procedure that is used to replenish the buffer when the
-  ;; buffered characters are used up.  The procedure takes three
-  ;; arguments, (STRING START END), and attempts to fill the
-  ;; corresponding substring, returning the number of characters
-  ;; actually written.  If SOURCE is #F, the buffered characters are
-  ;; the entire stream.
-  source
-  ;; True if there are no more characters past END.
-  at-end?
-  ;; The number of newlines to the left of the current position.
-  line)
-
-;;; The two basic kinds of buffers: substring and source.  A substring
-;;; buffer is one that reads from a pre-filled substring.  A source
-;;; buffer is one that reads from an unbuffered source of unbounded
-;;; length.
-
-(define (substring->parser-buffer string start end)
-  (make-parser-buffer string start end 0 start #f #t 0))
-
-(define (source->parser-buffer source)
-  (make-parser-buffer (make-string min-length) 0 0 0 0 source #f 0))
-
-(define-integrable min-length 256)
-
-(define (string->parser-buffer string)
-  (substring->parser-buffer string 0 (string-length string)))
-
-(define (input-port->parser-buffer port)
-  (source->parser-buffer
-   (lambda (string start end)
-     (read-substring! string start end port))))
-
-(define-structure parser-buffer-pointer
-  (index #f read-only #t)
-  (line #f read-only #t))
-\f
-(define (get-parser-buffer-pointer buffer)
-  ;; Get an object that represents the current position.
-  (make-parser-buffer-pointer (+ (parser-buffer-base-offset buffer)
-                                (parser-buffer-index buffer))
-                             (parser-buffer-line buffer)))
-
-(define (set-parser-buffer-pointer! buffer p)
-  ;; Move the current position to P, which must be an object that was
-  ;; previously returned by GET-PARSER-BUFFER-POINTER.  The position
-  ;; may only be moved to the left.
-  (set-parser-buffer-index! buffer (pointer->index p buffer))
-  (set-parser-buffer-line! buffer (parser-buffer-pointer-line p)))
-
-(define (get-parser-buffer-tail buffer p)
-  ;; P must be a buffer pointer previously returned by
-  ;; GET-PARSER-BUFFER-POINTER.  Return the string of characters
-  ;; between P and the current buffer pointer.
-  (substring (parser-buffer-string buffer)
-            (pointer->index p buffer)
-            (parser-buffer-index buffer)))
-
-(define (pointer->index p buffer)
-  (if (parser-buffer-pointer? p)
-      (let ((p*
-            (- (parser-buffer-pointer-index p)
-               (parser-buffer-base-offset buffer))))
-       (if (<= (parser-buffer-start buffer) p* (parser-buffer-index buffer))
-           p*
-           (error:bad-range-argument p 'POINTER->INDEX)))
-      (error:wrong-type-argument p "parser-buffer pointer" 'POINTER->INDEX)))
-
-(define (parser-buffer-position-string object)
-  (let ((pointer
-        (if (parser-buffer-pointer? object)
-            object
-            (get-parser-buffer-pointer object))))
-    (string-append
-     "line "
-     (number->string (+ (parser-buffer-pointer-line pointer) 1))
-     ", char "
-     (number->string (+ (parser-buffer-pointer-index pointer) 1)))))
-
-(define (read-parser-buffer-char buffer)
-  ;; Attempt to read the next character from BUFFER, starting at the
-  ;; current position.  If there is a character available, increment
-  ;; the position and return the character.  If there are no more
-  ;; characters available, return #F and leave the position unchanged.
-  (and (guarantee-buffer-chars buffer 1)
-       (let ((char
-             (string-ref (parser-buffer-string buffer)
-                         (parser-buffer-index buffer))))
-        (increment-buffer-index! buffer char)
-        char)))
-
-(define (peek-parser-buffer-char buffer)
-  ;; Attempt to read the next character from BUFFER, starting at the
-  ;; current position.  If there is a character available, return it,
-  ;; otherwise return #F.  The position is unaffected in either case.
-  (and (guarantee-buffer-chars buffer 1)
-       (string-ref (parser-buffer-string buffer)
-                  (parser-buffer-index buffer))))
-
-(define (parser-buffer-ref buffer index)
-  (if (not (index-fixnum? index))
-      (error:wrong-type-argument index "index" 'PARSER-BUFFER-REF))
-  (and (guarantee-buffer-chars buffer (fix:+ index 1))
-       (string-ref (parser-buffer-string buffer)
-                  (fix:+ (parser-buffer-index buffer) index))))
-\f
-(let-syntax
-    ((char-matcher
-      (lambda (name test)
-       `(BEGIN
-          (DEFINE (,(symbol-append 'MATCH-PARSER-BUFFER- name '-NO-ADVANCE)
-                   BUFFER REFERENCE)
-            (AND (GUARANTEE-BUFFER-CHARS BUFFER 1)
-                 (LET ((CHAR
-                        (STRING-REF (PARSER-BUFFER-STRING BUFFER)
-                                    (PARSER-BUFFER-INDEX BUFFER))))
-                   (DECLARE (INTEGRATE CHAR))
-                   ,test)))
-          (DEFINE (,(symbol-append 'MATCH-PARSER-BUFFER- name)
-                   BUFFER REFERENCE)
-            (AND (GUARANTEE-BUFFER-CHARS BUFFER 1)
-                 (LET ((CHAR
-                        (STRING-REF (PARSER-BUFFER-STRING BUFFER)
-                                    (PARSER-BUFFER-INDEX BUFFER))))
-                   (AND ,test
-                        (BEGIN
-                          (INCREMENT-BUFFER-INDEX! BUFFER CHAR)
-                          #T)))))))))
-  (char-matcher char (char=? char reference))
-  (char-matcher char-ci (char-ci=? char reference))
-  (char-matcher not-char (not (char=? char reference)))
-  (char-matcher not-char-ci (not (char-ci=? char reference)))
-  (char-matcher char-in-set (char-set-member? reference char)))
-
-(define (match-utf8-char-in-alphabet buffer alphabet)
-  (let ((p (get-parser-buffer-pointer buffer)))
-    (if (let ((n
-              (read-utf8-code-point-from-source
-               (lambda ()
-                 (read-parser-buffer-char buffer)))))
-         (and n
-              (code-point-in-alphabet? n alphabet)))
-       #t
-       (begin
-         (set-parser-buffer-pointer! buffer p)
-         #f))))
-\f
-(let-syntax
-    ((string-matcher
-      (lambda (suffix)
-       (let ((name
-              (intern (string-append "match-parser-buffer-string" suffix)))
-             (match-substring
-              (intern
-               (string-append "match-parser-buffer-substring" suffix))))
-         `(DEFINE (,name BUFFER STRING)
-            (,match-substring BUFFER STRING 0 (STRING-LENGTH STRING)))))))
-  (string-matcher "")
-  (string-matcher "-ci")
-  (string-matcher "-no-advance")
-  (string-matcher "-ci-no-advance"))
-
-(let-syntax
-    ((substring-matcher
-      (lambda (suffix)
-       `(DEFINE (,(intern
-                   (string-append "match-parser-buffer-substring" suffix))
-                 BUFFER STRING START END)
-          (LET ((N (FIX:- END START)))
-            (AND (GUARANTEE-BUFFER-CHARS BUFFER N)
-                 (,(intern (string-append "substring" suffix "=?"))
-                  STRING START END
-                  (PARSER-BUFFER-STRING BUFFER)
-                  (PARSER-BUFFER-INDEX BUFFER)
-                  (FIX:+ (PARSER-BUFFER-INDEX BUFFER) N))
-                 (BEGIN
-                   (BUFFER-INDEX+N! BUFFER N)
-                   #T)))))))
-  (substring-matcher "")
-  (substring-matcher "-ci"))
-
-(let-syntax
-    ((substring-matcher
-      (lambda (suffix)
-       `(DEFINE (,(intern
-                   (string-append "match-parser-buffer-substring"
-                                  suffix
-                                  "-no-advance"))
-                 BUFFER STRING START END)
-          (LET ((N (FIX:- END START)))
-            (AND (GUARANTEE-BUFFER-CHARS BUFFER N)
-                 (,(intern (string-append "substring" suffix "=?"))
-                  STRING START END
-                  (PARSER-BUFFER-STRING BUFFER)
-                  (PARSER-BUFFER-INDEX BUFFER)
-                  (FIX:+ (PARSER-BUFFER-INDEX BUFFER) N))))))))
-  (substring-matcher "")
-  (substring-matcher "-ci"))
-\f
-(define-integrable (increment-buffer-index! buffer char)
-  (set-parser-buffer-index! buffer (fix:+ (parser-buffer-index buffer) 1))
-  (if (char=? char #\newline)
-      (set-parser-buffer-line! buffer (fix:+ (parser-buffer-line buffer) 1))))
-
-(define (buffer-index+n! buffer n)
-  (let ((i (parser-buffer-index buffer))
-       (s (parser-buffer-string buffer)))
-    (let ((j (fix:+ i n)))
-      (do ((i i (fix:+ i 1)))
-         ((fix:= i j))
-       (if (char=? (string-ref s i) #\newline)
-           (set-parser-buffer-line! buffer
-                                    (fix:+ (parser-buffer-line buffer) 1))))
-      (set-parser-buffer-index! buffer j))))
-
-(define-integrable (guarantee-buffer-chars buffer n)
-  (or (fix:<= (fix:+ (parser-buffer-index buffer) n)
-             (parser-buffer-end buffer))
-      (guarantee-buffer-chars-1 buffer n)))
-
-(define (guarantee-buffer-chars-1 buffer n)
-  (let ((min-end (fix:+ (parser-buffer-index buffer) n))
-       (end (parser-buffer-end buffer)))
-    (and (not (parser-buffer-at-end? buffer))
-        (begin
-          (let* ((string (parser-buffer-string buffer))
-                 (max-end (string-length string))
-                 (max-end*
-                  (let loop ((max-end* max-end))
-                    (if (fix:<= min-end max-end*)
-                        max-end*
-                        (loop (fix:* max-end* 2))))))
-            (if (fix:> max-end* max-end)
-                (let ((string* (make-string max-end*)))
-                  (string-move! string string* 0)
-                  (set-parser-buffer-string! buffer string*))))
-          (let ((n-read
-                 (let ((string (parser-buffer-string buffer)))
-                   ((parser-buffer-source buffer)
-                    string end (string-length string)))))
-            (if (fix:> n-read 0)
-                (let ((end (fix:+ end n-read)))
-                  (set-parser-buffer-end! buffer end)
-                  (fix:<= min-end end))
-                (begin
-                  (set-parser-buffer-at-end?! buffer #t)
-                  #f)))))))
-
-(define (discard-parser-buffer-head! buffer)
-  ;; Tell the buffer that it is safe to discard all characters to the
-  ;; left of the current position.
-  (if (parser-buffer-source buffer)
-      (let ((string (parser-buffer-string buffer))
-           (index (parser-buffer-index buffer))
-           (end (parser-buffer-end buffer)))
-       (if (fix:< 0 index)
-           (let* ((end* (fix:- end index))
-                  (string*
-                   (let ((n (string-length string)))
-                     (if (and (fix:> n min-length)
-                              (fix:<= end* (fix:quotient n 4)))
-                         (make-string (fix:quotient n 2))
-                         string))))
-             (without-interrupts
-              (lambda ()
-                (substring-move! string index end string* 0)
-                (set-parser-buffer-string! buffer string*)
-                (set-parser-buffer-index! buffer 0)
-                (set-parser-buffer-end! buffer end*)
-                (set-parser-buffer-base-offset!
-                 buffer
-                 (+ (parser-buffer-base-offset buffer) index)))))))
-      (set-parser-buffer-start! buffer (parser-buffer-index buffer))))
\ No newline at end of file
index 781d9f630c59e40686df0a23b93a4764c028431b..82e0c4045d481b20862f07bb8c1e4807773f1760 100644 (file)
@@ -1,6 +1,6 @@
 ;;; -*-Scheme-*-
 ;;;
-;;; $Id: compile.scm,v 1.3 2001/10/01 05:20:36 cph Exp $
+;;; $Id: compile.scm,v 1.4 2001/11/11 06:00:08 cph Exp $
 ;;;
 ;;; Copyright (c) 2001 Massachusetts Institute of Technology
 ;;;
 (with-working-directory-pathname (directory-pathname (current-load-pathname))
   (lambda ()
     (for-each compile-file
-             '("buffer"
-               "matcher"
+             '("matcher"
                "parser"
                "shared"
-               "synchk"
-               "unicode"))
+               "synchk"))
     (cref/generate-constructors "parser")))
\ No newline at end of file
index 2f32e69043793c20fd49564b86c9cda4349f002f..d76d8b257b0ce0e6e9d6fb5c985587d9b65156cd 100644 (file)
@@ -1,6 +1,6 @@
 ;;; -*-Scheme-*-
 ;;;
-;;; $Id: ed-ffi.scm,v 1.3 2001/07/11 22:09:50 cph Exp $
+;;; $Id: ed-ffi.scm,v 1.4 2001/11/11 06:00:26 cph Exp $
 ;;;
 ;;; Copyright (c) 2001 Massachusetts Institute of Technology
 ;;;
 ;;;; Parser language: Edwin buffer packaging info
 
 (standard-scheme-find-file-initialization
- '#(("buffer"  (runtime *parser buffer)
-               system-global-syntax-table)
-    ("matcher" (runtime *parser)
+ '#(("matcher" (runtime *parser)
                system-global-syntax-table)
     ("parser"  (runtime *parser)
                system-global-syntax-table)
     ("shared"  (runtime *parser)
                system-global-syntax-table)
     ("synchk"  (runtime *parser)
-               system-global-syntax-table)
-    ("unicode"  (runtime unicode)
                system-global-syntax-table)))
\ No newline at end of file
index d746e4e63721da9df16e2a37cb8b1a6789308e00..35a2bd24c1e7faf037cddddf0ac1818bc1bd6bda 100644 (file)
@@ -1,6 +1,6 @@
 ;;; -*-Scheme-*-
 ;;;
-;;; $Id: parser.pkg,v 1.14 2001/10/04 16:51:20 cph Exp $
+;;; $Id: parser.pkg,v 1.15 2001/11/11 05:59:19 cph Exp $
 ;;;
 ;;; Copyright (c) 2001 Massachusetts Institute of Technology
 ;;;
 
 (global-definitions "../runtime/runtime")
 
-(define-package (runtime *parser buffer)
-  (files "buffer")
-  (parent ())
-  (export ()
-         discard-parser-buffer-head!
-         get-parser-buffer-pointer
-         get-parser-buffer-tail
-         input-port->parser-buffer
-         match-parser-buffer-char
-         match-parser-buffer-char-ci
-         match-parser-buffer-char-ci-no-advance
-         match-parser-buffer-char-in-set
-         match-parser-buffer-char-in-set-no-advance
-         match-parser-buffer-char-no-advance
-         match-parser-buffer-not-char
-         match-parser-buffer-not-char-ci
-         match-parser-buffer-not-char-ci-no-advance
-         match-parser-buffer-not-char-no-advance
-         match-parser-buffer-string
-         match-parser-buffer-string-ci
-         match-parser-buffer-string-ci-no-advance
-         match-parser-buffer-string-no-advance
-         match-parser-buffer-substring
-         match-parser-buffer-substring-ci
-         match-parser-buffer-substring-ci-no-advance
-         match-parser-buffer-substring-no-advance
-         match-utf8-char-in-alphabet
-         parser-buffer-line
-         parser-buffer-pointer-index
-         parser-buffer-pointer-line
-         parser-buffer-pointer?
-         parser-buffer-position-string
-         parser-buffer-ref
-         parser-buffer?
-         peek-parser-buffer-char
-         read-parser-buffer-char
-         set-parser-buffer-pointer!
-         source->parser-buffer
-         string->parser-buffer
-         substring->parser-buffer))
-
 (define-package (runtime *parser)
   (files "synchk" "shared" "matcher" "parser")
   (parent ())
          make-parser-macros
          parser-macros?
          set-current-parser-macros!
-         with-current-parser-macros))
-
-(define-package (runtime unicode)
-  (files "unicode")
-  (parent ())
-  (export ()
-         8-bit-alphabet?
-         alphabet+
-         alphabet-
-         alphabet->char-set
-         alphabet->code-points
-         alphabet->string
-         alphabet?
-         char-in-alphabet?
-         char-set->alphabet
-         code-point->utf8-string
-         code-point-in-alphabet?
-         code-points->alphabet
-         read-utf8-code-point
-         read-utf8-code-point-from-source
-         string->alphabet
-         unicode-code-point?
-         utf8-string->code-point
-         well-formed-code-points-list?
-         write-utf8-code-point))
\ No newline at end of file
+         with-current-parser-macros))
\ No newline at end of file