From: Chris Hanson Date: Sun, 11 Nov 2001 05:52:30 +0000 (+0000) Subject: Move parser-buffer abstraction into runtime system. X-Git-Tag: 20090517-FFI~2452 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=4d7a41d5c3ddbed9ab81235617e5f104294df687;p=mit-scheme.git Move parser-buffer abstraction into runtime system. --- diff --git a/v7/src/runtime/ed-ffi.scm b/v7/src/runtime/ed-ffi.scm index 9f0f28c8e..1e31dda8c 100644 --- a/v7/src/runtime/ed-ffi.scm +++ b/v7/src/runtime/ed-ffi.scm @@ -1,6 +1,6 @@ #| -*- Scheme -*- -$Id: ed-ffi.scm,v 1.28 2001/11/05 21:24:26 cph Exp $ +$Id: ed-ffi.scm,v 1.29 2001/11/11 05:52:30 cph Exp $ Copyright (c) 1988-2001 Massachusetts Institute of Technology @@ -192,6 +192,8 @@ USA. syntax-table/system-internal) ("parse" (runtime parser) syntax-table/system-internal) + ("parser-buffer" (runtime parser-buffer) + syntax-table/system-internal) ("partab" (runtime parser-table) syntax-table/system-internal) ("pathnm" (runtime pathname) diff --git a/v7/src/runtime/parser-buffer.scm b/v7/src/runtime/parser-buffer.scm new file mode 100644 index 000000000..b4a64df73 --- /dev/null +++ b/v7/src/runtime/parser-buffer.scm @@ -0,0 +1,310 @@ +;;; -*-Scheme-*- +;;; +;;; $Id: parser-buffer.scm,v 1.1 2001/11/11 05:51:13 cph Exp $ +;;; +;;; Copyright (c) 2001 Massachusetts Institute of Technology +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 2 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +;;; 02111-1307, USA. + +;;;; Parser-buffer abstraction + +(declare (usual-integrations)) + +;;;; Parser buffer abstraction + +(define-structure parser-buffer + ;; The string buffer, as a substring: + string + start + end + ;; The offset of the string buffer within the character stream. + ;; This is always zero if SOURCE is #F. + base-offset + ;; Our current position in the buffer. + index + ;; A procedure that is used to replenish the buffer when the + ;; buffered characters are used up. The procedure takes three + ;; arguments, (STRING START END), and attempts to fill the + ;; corresponding substring, returning the number of characters + ;; actually written. If SOURCE is #F, the buffered characters are + ;; the entire stream. + source + ;; True if there are no more characters past END. + at-end? + ;; The number of newlines to the left of the current position. + line) + +;;; The two basic kinds of buffers: substring and source. A substring +;;; buffer is one that reads from a pre-filled substring. A source +;;; buffer is one that reads from an unbuffered source of unbounded +;;; length. + +(define (substring->parser-buffer string start end) + (make-parser-buffer string start end 0 start #f #t 0)) + +(define (source->parser-buffer source) + (make-parser-buffer (make-string min-length) 0 0 0 0 source #f 0)) + +(define-integrable min-length 256) + +(define (string->parser-buffer string) + (substring->parser-buffer string 0 (string-length string))) + +(define (input-port->parser-buffer port) + (source->parser-buffer + (lambda (string start end) + (read-substring! string start end port)))) + +(define-structure parser-buffer-pointer + (index #f read-only #t) + (line #f read-only #t)) + +(define (get-parser-buffer-pointer buffer) + ;; Get an object that represents the current position. + (make-parser-buffer-pointer (+ (parser-buffer-base-offset buffer) + (parser-buffer-index buffer)) + (parser-buffer-line buffer))) + +(define (set-parser-buffer-pointer! buffer p) + ;; Move the current position to P, which must be an object that was + ;; previously returned by GET-PARSER-BUFFER-POINTER. The position + ;; may only be moved to the left. + (set-parser-buffer-index! buffer (pointer->index p buffer)) + (set-parser-buffer-line! buffer (parser-buffer-pointer-line p))) + +(define (get-parser-buffer-tail buffer p) + ;; P must be a buffer pointer previously returned by + ;; GET-PARSER-BUFFER-POINTER. Return the string of characters + ;; between P and the current buffer pointer. + (substring (parser-buffer-string buffer) + (pointer->index p buffer) + (parser-buffer-index buffer))) + +(define (pointer->index p buffer) + (if (parser-buffer-pointer? p) + (let ((p* + (- (parser-buffer-pointer-index p) + (parser-buffer-base-offset buffer)))) + (if (<= (parser-buffer-start buffer) p* (parser-buffer-index buffer)) + p* + (error:bad-range-argument p 'POINTER->INDEX))) + (error:wrong-type-argument p "parser-buffer pointer" 'POINTER->INDEX))) + +(define (parser-buffer-position-string object) + (let ((pointer + (if (parser-buffer-pointer? object) + object + (get-parser-buffer-pointer object)))) + (string-append + "line " + (number->string (+ (parser-buffer-pointer-line pointer) 1)) + ", char " + (number->string (+ (parser-buffer-pointer-index pointer) 1))))) + +(define (read-parser-buffer-char buffer) + ;; Attempt to read the next character from BUFFER, starting at the + ;; current position. If there is a character available, increment + ;; the position and return the character. If there are no more + ;; characters available, return #F and leave the position unchanged. + (and (guarantee-buffer-chars buffer 1) + (let ((char + (string-ref (parser-buffer-string buffer) + (parser-buffer-index buffer)))) + (increment-buffer-index! buffer char) + char))) + +(define (peek-parser-buffer-char buffer) + ;; Attempt to read the next character from BUFFER, starting at the + ;; current position. If there is a character available, return it, + ;; otherwise return #F. The position is unaffected in either case. + (and (guarantee-buffer-chars buffer 1) + (string-ref (parser-buffer-string buffer) + (parser-buffer-index buffer)))) + +(define (parser-buffer-ref buffer index) + (if (not (index-fixnum? index)) + (error:wrong-type-argument index "index" 'PARSER-BUFFER-REF)) + (and (guarantee-buffer-chars buffer (fix:+ index 1)) + (string-ref (parser-buffer-string buffer) + (fix:+ (parser-buffer-index buffer) index)))) + +(let-syntax + ((char-matcher + (lambda (name test) + `(BEGIN + (DEFINE (,(symbol-append 'MATCH-PARSER-BUFFER- name '-NO-ADVANCE) + BUFFER REFERENCE) + (AND (GUARANTEE-BUFFER-CHARS BUFFER 1) + (LET ((CHAR + (STRING-REF (PARSER-BUFFER-STRING BUFFER) + (PARSER-BUFFER-INDEX BUFFER)))) + (DECLARE (INTEGRATE CHAR)) + ,test))) + (DEFINE (,(symbol-append 'MATCH-PARSER-BUFFER- name) + BUFFER REFERENCE) + (AND (GUARANTEE-BUFFER-CHARS BUFFER 1) + (LET ((CHAR + (STRING-REF (PARSER-BUFFER-STRING BUFFER) + (PARSER-BUFFER-INDEX BUFFER)))) + (AND ,test + (BEGIN + (INCREMENT-BUFFER-INDEX! BUFFER CHAR) + #T))))))))) + (char-matcher char (char=? char reference)) + (char-matcher char-ci (char-ci=? char reference)) + (char-matcher not-char (not (char=? char reference))) + (char-matcher not-char-ci (not (char-ci=? char reference))) + (char-matcher char-in-set (char-set-member? reference char))) + +(define (match-utf8-char-in-alphabet buffer alphabet) + (let ((p (get-parser-buffer-pointer buffer))) + (if (let ((n + (read-utf8-code-point-from-source + (lambda () + (read-parser-buffer-char buffer))))) + (and n + (code-point-in-alphabet? n alphabet))) + #t + (begin + (set-parser-buffer-pointer! buffer p) + #f)))) + +(let-syntax + ((string-matcher + (lambda (suffix) + (let ((name + (intern (string-append "match-parser-buffer-string" suffix))) + (match-substring + (intern + (string-append "match-parser-buffer-substring" suffix)))) + `(DEFINE (,name BUFFER STRING) + (,match-substring BUFFER STRING 0 (STRING-LENGTH STRING))))))) + (string-matcher "") + (string-matcher "-ci") + (string-matcher "-no-advance") + (string-matcher "-ci-no-advance")) + +(let-syntax + ((substring-matcher + (lambda (suffix) + `(DEFINE (,(intern + (string-append "match-parser-buffer-substring" suffix)) + BUFFER STRING START END) + (LET ((N (FIX:- END START))) + (AND (GUARANTEE-BUFFER-CHARS BUFFER N) + (,(intern (string-append "substring" suffix "=?")) + STRING START END + (PARSER-BUFFER-STRING BUFFER) + (PARSER-BUFFER-INDEX BUFFER) + (FIX:+ (PARSER-BUFFER-INDEX BUFFER) N)) + (BEGIN + (BUFFER-INDEX+N! BUFFER N) + #T))))))) + (substring-matcher "") + (substring-matcher "-ci")) + +(let-syntax + ((substring-matcher + (lambda (suffix) + `(DEFINE (,(intern + (string-append "match-parser-buffer-substring" + suffix + "-no-advance")) + BUFFER STRING START END) + (LET ((N (FIX:- END START))) + (AND (GUARANTEE-BUFFER-CHARS BUFFER N) + (,(intern (string-append "substring" suffix "=?")) + STRING START END + (PARSER-BUFFER-STRING BUFFER) + (PARSER-BUFFER-INDEX BUFFER) + (FIX:+ (PARSER-BUFFER-INDEX BUFFER) N)))))))) + (substring-matcher "") + (substring-matcher "-ci")) + +(define-integrable (increment-buffer-index! buffer char) + (set-parser-buffer-index! buffer (fix:+ (parser-buffer-index buffer) 1)) + (if (char=? char #\newline) + (set-parser-buffer-line! buffer (fix:+ (parser-buffer-line buffer) 1)))) + +(define (buffer-index+n! buffer n) + (let ((i (parser-buffer-index buffer)) + (s (parser-buffer-string buffer))) + (let ((j (fix:+ i n))) + (do ((i i (fix:+ i 1))) + ((fix:= i j)) + (if (char=? (string-ref s i) #\newline) + (set-parser-buffer-line! buffer + (fix:+ (parser-buffer-line buffer) 1)))) + (set-parser-buffer-index! buffer j)))) + +(define-integrable (guarantee-buffer-chars buffer n) + (or (fix:<= (fix:+ (parser-buffer-index buffer) n) + (parser-buffer-end buffer)) + (guarantee-buffer-chars-1 buffer n))) + +(define (guarantee-buffer-chars-1 buffer n) + (let ((min-end (fix:+ (parser-buffer-index buffer) n)) + (end (parser-buffer-end buffer))) + (and (not (parser-buffer-at-end? buffer)) + (begin + (let* ((string (parser-buffer-string buffer)) + (max-end (string-length string)) + (max-end* + (let loop ((max-end* max-end)) + (if (fix:<= min-end max-end*) + max-end* + (loop (fix:* max-end* 2)))))) + (if (fix:> max-end* max-end) + (let ((string* (make-string max-end*))) + (string-move! string string* 0) + (set-parser-buffer-string! buffer string*)))) + (let ((n-read + (let ((string (parser-buffer-string buffer))) + ((parser-buffer-source buffer) + string end (string-length string))))) + (if (fix:> n-read 0) + (let ((end (fix:+ end n-read))) + (set-parser-buffer-end! buffer end) + (fix:<= min-end end)) + (begin + (set-parser-buffer-at-end?! buffer #t) + #f))))))) + +(define (discard-parser-buffer-head! buffer) + ;; Tell the buffer that it is safe to discard all characters to the + ;; left of the current position. + (if (parser-buffer-source buffer) + (let ((string (parser-buffer-string buffer)) + (index (parser-buffer-index buffer)) + (end (parser-buffer-end buffer))) + (if (fix:< 0 index) + (let* ((end* (fix:- end index)) + (string* + (let ((n (string-length string))) + (if (and (fix:> n min-length) + (fix:<= end* (fix:quotient n 4))) + (make-string (fix:quotient n 2)) + string)))) + (without-interrupts + (lambda () + (substring-move! string index end string* 0) + (set-parser-buffer-string! buffer string*) + (set-parser-buffer-index! buffer 0) + (set-parser-buffer-end! buffer end*) + (set-parser-buffer-base-offset! + buffer + (+ (parser-buffer-base-offset buffer) index))))))) + (set-parser-buffer-start! buffer (parser-buffer-index buffer)))) \ No newline at end of file diff --git a/v7/src/runtime/runtime.pkg b/v7/src/runtime/runtime.pkg index 26b8837c5..25db79b66 100644 --- a/v7/src/runtime/runtime.pkg +++ b/v7/src/runtime/runtime.pkg @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: runtime.pkg,v 14.381 2001/11/05 21:24:29 cph Exp $ +$Id: runtime.pkg,v 14.382 2001/11/11 05:51:51 cph Exp $ Copyright (c) 1988-2001 Massachusetts Institute of Technology @@ -3757,4 +3757,45 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA encode-quoted-printable:update make-decode-base64-port make-decode-binhex40-port - make-decode-quoted-printable-port)) \ No newline at end of file + make-decode-quoted-printable-port)) + +(define-package (runtime parser-buffer) + (files "parser-buffer") + (parent ()) + (export () + discard-parser-buffer-head! + get-parser-buffer-pointer + get-parser-buffer-tail + input-port->parser-buffer + match-parser-buffer-char + match-parser-buffer-char-ci + match-parser-buffer-char-ci-no-advance + match-parser-buffer-char-in-set + match-parser-buffer-char-in-set-no-advance + match-parser-buffer-char-no-advance + match-parser-buffer-not-char + match-parser-buffer-not-char-ci + match-parser-buffer-not-char-ci-no-advance + match-parser-buffer-not-char-no-advance + match-parser-buffer-string + match-parser-buffer-string-ci + match-parser-buffer-string-ci-no-advance + match-parser-buffer-string-no-advance + match-parser-buffer-substring + match-parser-buffer-substring-ci + match-parser-buffer-substring-ci-no-advance + match-parser-buffer-substring-no-advance + match-utf8-char-in-alphabet + parser-buffer-line + parser-buffer-pointer-index + parser-buffer-pointer-line + parser-buffer-pointer? + parser-buffer-position-string + parser-buffer-ref + parser-buffer? + peek-parser-buffer-char + read-parser-buffer-char + set-parser-buffer-pointer! + source->parser-buffer + string->parser-buffer + substring->parser-buffer)) \ No newline at end of file