From 3bbfbfe9bcdea4ab92e7f0c7d7a9c247999b6864 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Sun, 11 Nov 2001 06:02:52 +0000 Subject: [PATCH] Move unicode support into runtime system. --- v7/src/runtime/ed-ffi.scm | 4 +- v7/src/runtime/runtime.pkg | 28 +- v7/src/{star-parser => runtime}/unicode.scm | 2 +- v7/src/star-parser/buffer.scm | 310 -------------------- v7/src/star-parser/compile.scm | 8 +- v7/src/star-parser/ed-ffi.scm | 8 +- v7/src/star-parser/parser.pkg | 69 +---- 7 files changed, 37 insertions(+), 392 deletions(-) rename v7/src/{star-parser => runtime}/unicode.scm (99%) delete mode 100644 v7/src/star-parser/buffer.scm diff --git a/v7/src/runtime/ed-ffi.scm b/v7/src/runtime/ed-ffi.scm index 1e31dda8c..7cd66d0cf 100644 --- a/v7/src/runtime/ed-ffi.scm +++ b/v7/src/runtime/ed-ffi.scm @@ -1,6 +1,6 @@ #| -*- Scheme -*- -$Id: ed-ffi.scm,v 1.29 2001/11/11 05:52:30 cph Exp $ +$Id: ed-ffi.scm,v 1.30 2001/11/11 05:58:39 cph Exp $ Copyright (c) 1988-2001 Massachusetts Institute of Technology @@ -286,6 +286,8 @@ USA. syntax-table/system-internal) ("uerror" (runtime microcode-errors) syntax-table/system-internal) + ("unicode" (runtime unicode) + syntax-table/system-internal) ("unpars" (runtime unparser) syntax-table/system-internal) ("unsyn" (runtime unsyntaxer) diff --git a/v7/src/runtime/runtime.pkg b/v7/src/runtime/runtime.pkg index 25db79b66..f3719896b 100644 --- a/v7/src/runtime/runtime.pkg +++ b/v7/src/runtime/runtime.pkg @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: runtime.pkg,v 14.382 2001/11/11 05:51:51 cph Exp $ +$Id: runtime.pkg,v 14.383 2001/11/11 05:58:56 cph Exp $ Copyright (c) 1988-2001 Massachusetts Institute of Technology @@ -3798,4 +3798,28 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA set-parser-buffer-pointer! source->parser-buffer string->parser-buffer - substring->parser-buffer)) \ No newline at end of file + substring->parser-buffer)) + +(define-package (runtime unicode) + (files "unicode") + (parent ()) + (export () + 8-bit-alphabet? + alphabet+ + alphabet- + alphabet->char-set + alphabet->code-points + alphabet->string + alphabet? + char-in-alphabet? + char-set->alphabet + code-point->utf8-string + code-point-in-alphabet? + code-points->alphabet + read-utf8-code-point + read-utf8-code-point-from-source + string->alphabet + unicode-code-point? + utf8-string->code-point + well-formed-code-points-list? + write-utf8-code-point)) \ No newline at end of file diff --git a/v7/src/star-parser/unicode.scm b/v7/src/runtime/unicode.scm similarity index 99% rename from v7/src/star-parser/unicode.scm rename to v7/src/runtime/unicode.scm index 75e62208e..73cf601ea 100644 --- a/v7/src/star-parser/unicode.scm +++ b/v7/src/runtime/unicode.scm @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: unicode.scm,v 1.6 2001/10/04 16:59:18 cph Exp $ +;;; $Id: unicode.scm,v 1.1 2001/11/11 05:58:04 cph Exp $ ;;; ;;; Copyright (c) 2001 Massachusetts Institute of Technology ;;; diff --git a/v7/src/star-parser/buffer.scm b/v7/src/star-parser/buffer.scm deleted file mode 100644 index d9d53b740..000000000 --- a/v7/src/star-parser/buffer.scm +++ /dev/null @@ -1,310 +0,0 @@ -;;; -*-Scheme-*- -;;; -;;; $Id: buffer.scm,v 1.11 2001/10/04 16:59:56 cph Exp $ -;;; -;;; Copyright (c) 2001 Massachusetts Institute of Technology -;;; -;;; This program is free software; you can redistribute it and/or -;;; modify it under the terms of the GNU General Public License as -;;; published by the Free Software Foundation; either version 2 of the -;;; License, or (at your option) any later version. -;;; -;;; This program is distributed in the hope that it will be useful, -;;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -;;; General Public License for more details. -;;; -;;; You should have received a copy of the GNU General Public License -;;; along with this program; if not, write to the Free Software -;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -;;; 02111-1307, USA. - -;;;; Parser-buffer abstraction - -(declare (usual-integrations)) - -;;;; Parser buffer abstraction - -(define-structure parser-buffer - ;; The string buffer, as a substring: - string - start - end - ;; The offset of the string buffer within the character stream. - ;; This is always zero if SOURCE is #F. - base-offset - ;; Our current position in the buffer. - index - ;; A procedure that is used to replenish the buffer when the - ;; buffered characters are used up. The procedure takes three - ;; arguments, (STRING START END), and attempts to fill the - ;; corresponding substring, returning the number of characters - ;; actually written. If SOURCE is #F, the buffered characters are - ;; the entire stream. - source - ;; True if there are no more characters past END. - at-end? - ;; The number of newlines to the left of the current position. - line) - -;;; The two basic kinds of buffers: substring and source. A substring -;;; buffer is one that reads from a pre-filled substring. A source -;;; buffer is one that reads from an unbuffered source of unbounded -;;; length. - -(define (substring->parser-buffer string start end) - (make-parser-buffer string start end 0 start #f #t 0)) - -(define (source->parser-buffer source) - (make-parser-buffer (make-string min-length) 0 0 0 0 source #f 0)) - -(define-integrable min-length 256) - -(define (string->parser-buffer string) - (substring->parser-buffer string 0 (string-length string))) - -(define (input-port->parser-buffer port) - (source->parser-buffer - (lambda (string start end) - (read-substring! string start end port)))) - -(define-structure parser-buffer-pointer - (index #f read-only #t) - (line #f read-only #t)) - -(define (get-parser-buffer-pointer buffer) - ;; Get an object that represents the current position. - (make-parser-buffer-pointer (+ (parser-buffer-base-offset buffer) - (parser-buffer-index buffer)) - (parser-buffer-line buffer))) - -(define (set-parser-buffer-pointer! buffer p) - ;; Move the current position to P, which must be an object that was - ;; previously returned by GET-PARSER-BUFFER-POINTER. The position - ;; may only be moved to the left. - (set-parser-buffer-index! buffer (pointer->index p buffer)) - (set-parser-buffer-line! buffer (parser-buffer-pointer-line p))) - -(define (get-parser-buffer-tail buffer p) - ;; P must be a buffer pointer previously returned by - ;; GET-PARSER-BUFFER-POINTER. Return the string of characters - ;; between P and the current buffer pointer. - (substring (parser-buffer-string buffer) - (pointer->index p buffer) - (parser-buffer-index buffer))) - -(define (pointer->index p buffer) - (if (parser-buffer-pointer? p) - (let ((p* - (- (parser-buffer-pointer-index p) - (parser-buffer-base-offset buffer)))) - (if (<= (parser-buffer-start buffer) p* (parser-buffer-index buffer)) - p* - (error:bad-range-argument p 'POINTER->INDEX))) - (error:wrong-type-argument p "parser-buffer pointer" 'POINTER->INDEX))) - -(define (parser-buffer-position-string object) - (let ((pointer - (if (parser-buffer-pointer? object) - object - (get-parser-buffer-pointer object)))) - (string-append - "line " - (number->string (+ (parser-buffer-pointer-line pointer) 1)) - ", char " - (number->string (+ (parser-buffer-pointer-index pointer) 1))))) - -(define (read-parser-buffer-char buffer) - ;; Attempt to read the next character from BUFFER, starting at the - ;; current position. If there is a character available, increment - ;; the position and return the character. If there are no more - ;; characters available, return #F and leave the position unchanged. - (and (guarantee-buffer-chars buffer 1) - (let ((char - (string-ref (parser-buffer-string buffer) - (parser-buffer-index buffer)))) - (increment-buffer-index! buffer char) - char))) - -(define (peek-parser-buffer-char buffer) - ;; Attempt to read the next character from BUFFER, starting at the - ;; current position. If there is a character available, return it, - ;; otherwise return #F. The position is unaffected in either case. - (and (guarantee-buffer-chars buffer 1) - (string-ref (parser-buffer-string buffer) - (parser-buffer-index buffer)))) - -(define (parser-buffer-ref buffer index) - (if (not (index-fixnum? index)) - (error:wrong-type-argument index "index" 'PARSER-BUFFER-REF)) - (and (guarantee-buffer-chars buffer (fix:+ index 1)) - (string-ref (parser-buffer-string buffer) - (fix:+ (parser-buffer-index buffer) index)))) - -(let-syntax - ((char-matcher - (lambda (name test) - `(BEGIN - (DEFINE (,(symbol-append 'MATCH-PARSER-BUFFER- name '-NO-ADVANCE) - BUFFER REFERENCE) - (AND (GUARANTEE-BUFFER-CHARS BUFFER 1) - (LET ((CHAR - (STRING-REF (PARSER-BUFFER-STRING BUFFER) - (PARSER-BUFFER-INDEX BUFFER)))) - (DECLARE (INTEGRATE CHAR)) - ,test))) - (DEFINE (,(symbol-append 'MATCH-PARSER-BUFFER- name) - BUFFER REFERENCE) - (AND (GUARANTEE-BUFFER-CHARS BUFFER 1) - (LET ((CHAR - (STRING-REF (PARSER-BUFFER-STRING BUFFER) - (PARSER-BUFFER-INDEX BUFFER)))) - (AND ,test - (BEGIN - (INCREMENT-BUFFER-INDEX! BUFFER CHAR) - #T))))))))) - (char-matcher char (char=? char reference)) - (char-matcher char-ci (char-ci=? char reference)) - (char-matcher not-char (not (char=? char reference))) - (char-matcher not-char-ci (not (char-ci=? char reference))) - (char-matcher char-in-set (char-set-member? reference char))) - -(define (match-utf8-char-in-alphabet buffer alphabet) - (let ((p (get-parser-buffer-pointer buffer))) - (if (let ((n - (read-utf8-code-point-from-source - (lambda () - (read-parser-buffer-char buffer))))) - (and n - (code-point-in-alphabet? n alphabet))) - #t - (begin - (set-parser-buffer-pointer! buffer p) - #f)))) - -(let-syntax - ((string-matcher - (lambda (suffix) - (let ((name - (intern (string-append "match-parser-buffer-string" suffix))) - (match-substring - (intern - (string-append "match-parser-buffer-substring" suffix)))) - `(DEFINE (,name BUFFER STRING) - (,match-substring BUFFER STRING 0 (STRING-LENGTH STRING))))))) - (string-matcher "") - (string-matcher "-ci") - (string-matcher "-no-advance") - (string-matcher "-ci-no-advance")) - -(let-syntax - ((substring-matcher - (lambda (suffix) - `(DEFINE (,(intern - (string-append "match-parser-buffer-substring" suffix)) - BUFFER STRING START END) - (LET ((N (FIX:- END START))) - (AND (GUARANTEE-BUFFER-CHARS BUFFER N) - (,(intern (string-append "substring" suffix "=?")) - STRING START END - (PARSER-BUFFER-STRING BUFFER) - (PARSER-BUFFER-INDEX BUFFER) - (FIX:+ (PARSER-BUFFER-INDEX BUFFER) N)) - (BEGIN - (BUFFER-INDEX+N! BUFFER N) - #T))))))) - (substring-matcher "") - (substring-matcher "-ci")) - -(let-syntax - ((substring-matcher - (lambda (suffix) - `(DEFINE (,(intern - (string-append "match-parser-buffer-substring" - suffix - "-no-advance")) - BUFFER STRING START END) - (LET ((N (FIX:- END START))) - (AND (GUARANTEE-BUFFER-CHARS BUFFER N) - (,(intern (string-append "substring" suffix "=?")) - STRING START END - (PARSER-BUFFER-STRING BUFFER) - (PARSER-BUFFER-INDEX BUFFER) - (FIX:+ (PARSER-BUFFER-INDEX BUFFER) N)))))))) - (substring-matcher "") - (substring-matcher "-ci")) - -(define-integrable (increment-buffer-index! buffer char) - (set-parser-buffer-index! buffer (fix:+ (parser-buffer-index buffer) 1)) - (if (char=? char #\newline) - (set-parser-buffer-line! buffer (fix:+ (parser-buffer-line buffer) 1)))) - -(define (buffer-index+n! buffer n) - (let ((i (parser-buffer-index buffer)) - (s (parser-buffer-string buffer))) - (let ((j (fix:+ i n))) - (do ((i i (fix:+ i 1))) - ((fix:= i j)) - (if (char=? (string-ref s i) #\newline) - (set-parser-buffer-line! buffer - (fix:+ (parser-buffer-line buffer) 1)))) - (set-parser-buffer-index! buffer j)))) - -(define-integrable (guarantee-buffer-chars buffer n) - (or (fix:<= (fix:+ (parser-buffer-index buffer) n) - (parser-buffer-end buffer)) - (guarantee-buffer-chars-1 buffer n))) - -(define (guarantee-buffer-chars-1 buffer n) - (let ((min-end (fix:+ (parser-buffer-index buffer) n)) - (end (parser-buffer-end buffer))) - (and (not (parser-buffer-at-end? buffer)) - (begin - (let* ((string (parser-buffer-string buffer)) - (max-end (string-length string)) - (max-end* - (let loop ((max-end* max-end)) - (if (fix:<= min-end max-end*) - max-end* - (loop (fix:* max-end* 2)))))) - (if (fix:> max-end* max-end) - (let ((string* (make-string max-end*))) - (string-move! string string* 0) - (set-parser-buffer-string! buffer string*)))) - (let ((n-read - (let ((string (parser-buffer-string buffer))) - ((parser-buffer-source buffer) - string end (string-length string))))) - (if (fix:> n-read 0) - (let ((end (fix:+ end n-read))) - (set-parser-buffer-end! buffer end) - (fix:<= min-end end)) - (begin - (set-parser-buffer-at-end?! buffer #t) - #f))))))) - -(define (discard-parser-buffer-head! buffer) - ;; Tell the buffer that it is safe to discard all characters to the - ;; left of the current position. - (if (parser-buffer-source buffer) - (let ((string (parser-buffer-string buffer)) - (index (parser-buffer-index buffer)) - (end (parser-buffer-end buffer))) - (if (fix:< 0 index) - (let* ((end* (fix:- end index)) - (string* - (let ((n (string-length string))) - (if (and (fix:> n min-length) - (fix:<= end* (fix:quotient n 4))) - (make-string (fix:quotient n 2)) - string)))) - (without-interrupts - (lambda () - (substring-move! string index end string* 0) - (set-parser-buffer-string! buffer string*) - (set-parser-buffer-index! buffer 0) - (set-parser-buffer-end! buffer end*) - (set-parser-buffer-base-offset! - buffer - (+ (parser-buffer-base-offset buffer) index))))))) - (set-parser-buffer-start! buffer (parser-buffer-index buffer)))) \ No newline at end of file diff --git a/v7/src/star-parser/compile.scm b/v7/src/star-parser/compile.scm index 781d9f630..82e0c4045 100644 --- a/v7/src/star-parser/compile.scm +++ b/v7/src/star-parser/compile.scm @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: compile.scm,v 1.3 2001/10/01 05:20:36 cph Exp $ +;;; $Id: compile.scm,v 1.4 2001/11/11 06:00:08 cph Exp $ ;;; ;;; Copyright (c) 2001 Massachusetts Institute of Technology ;;; @@ -25,10 +25,8 @@ (with-working-directory-pathname (directory-pathname (current-load-pathname)) (lambda () (for-each compile-file - '("buffer" - "matcher" + '("matcher" "parser" "shared" - "synchk" - "unicode")) + "synchk")) (cref/generate-constructors "parser"))) \ No newline at end of file diff --git a/v7/src/star-parser/ed-ffi.scm b/v7/src/star-parser/ed-ffi.scm index 2f32e6904..d76d8b257 100644 --- a/v7/src/star-parser/ed-ffi.scm +++ b/v7/src/star-parser/ed-ffi.scm @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: ed-ffi.scm,v 1.3 2001/07/11 22:09:50 cph Exp $ +;;; $Id: ed-ffi.scm,v 1.4 2001/11/11 06:00:26 cph Exp $ ;;; ;;; Copyright (c) 2001 Massachusetts Institute of Technology ;;; @@ -22,15 +22,11 @@ ;;;; Parser language: Edwin buffer packaging info (standard-scheme-find-file-initialization - '#(("buffer" (runtime *parser buffer) - system-global-syntax-table) - ("matcher" (runtime *parser) + '#(("matcher" (runtime *parser) system-global-syntax-table) ("parser" (runtime *parser) system-global-syntax-table) ("shared" (runtime *parser) system-global-syntax-table) ("synchk" (runtime *parser) - system-global-syntax-table) - ("unicode" (runtime unicode) system-global-syntax-table))) \ No newline at end of file diff --git a/v7/src/star-parser/parser.pkg b/v7/src/star-parser/parser.pkg index d746e4e63..35a2bd24c 100644 --- a/v7/src/star-parser/parser.pkg +++ b/v7/src/star-parser/parser.pkg @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: parser.pkg,v 1.14 2001/10/04 16:51:20 cph Exp $ +;;; $Id: parser.pkg,v 1.15 2001/11/11 05:59:19 cph Exp $ ;;; ;;; Copyright (c) 2001 Massachusetts Institute of Technology ;;; @@ -23,47 +23,6 @@ (global-definitions "../runtime/runtime") -(define-package (runtime *parser buffer) - (files "buffer") - (parent ()) - (export () - discard-parser-buffer-head! - get-parser-buffer-pointer - get-parser-buffer-tail - input-port->parser-buffer - match-parser-buffer-char - match-parser-buffer-char-ci - match-parser-buffer-char-ci-no-advance - match-parser-buffer-char-in-set - match-parser-buffer-char-in-set-no-advance - match-parser-buffer-char-no-advance - match-parser-buffer-not-char - match-parser-buffer-not-char-ci - match-parser-buffer-not-char-ci-no-advance - match-parser-buffer-not-char-no-advance - match-parser-buffer-string - match-parser-buffer-string-ci - match-parser-buffer-string-ci-no-advance - match-parser-buffer-string-no-advance - match-parser-buffer-substring - match-parser-buffer-substring-ci - match-parser-buffer-substring-ci-no-advance - match-parser-buffer-substring-no-advance - match-utf8-char-in-alphabet - parser-buffer-line - parser-buffer-pointer-index - parser-buffer-pointer-line - parser-buffer-pointer? - parser-buffer-position-string - parser-buffer-ref - parser-buffer? - peek-parser-buffer-char - read-parser-buffer-char - set-parser-buffer-pointer! - source->parser-buffer - string->parser-buffer - substring->parser-buffer)) - (define-package (runtime *parser) (files "synchk" "shared" "matcher" "parser") (parent ()) @@ -75,28 +34,4 @@ make-parser-macros parser-macros? set-current-parser-macros! - with-current-parser-macros)) - -(define-package (runtime unicode) - (files "unicode") - (parent ()) - (export () - 8-bit-alphabet? - alphabet+ - alphabet- - alphabet->char-set - alphabet->code-points - alphabet->string - alphabet? - char-in-alphabet? - char-set->alphabet - code-point->utf8-string - code-point-in-alphabet? - code-points->alphabet - read-utf8-code-point - read-utf8-code-point-from-source - string->alphabet - unicode-code-point? - utf8-string->code-point - well-formed-code-points-list? - write-utf8-code-point)) \ No newline at end of file + with-current-parser-macros)) \ No newline at end of file -- 2.25.1