+++ /dev/null
-;;; -*-Scheme-*-
-;;;
-;;; $Id: buffer.scm,v 1.11 2001/10/04 16:59:56 cph Exp $
-;;;
-;;; Copyright (c) 2001 Massachusetts Institute of Technology
-;;;
-;;; This program is free software; you can redistribute it and/or
-;;; modify it under the terms of the GNU General Public License as
-;;; published by the Free Software Foundation; either version 2 of the
-;;; License, or (at your option) any later version.
-;;;
-;;; This program is distributed in the hope that it will be useful,
-;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-;;; General Public License for more details.
-;;;
-;;; You should have received a copy of the GNU General Public License
-;;; along with this program; if not, write to the Free Software
-;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-;;; 02111-1307, USA.
-
-;;;; Parser-buffer abstraction
-
-(declare (usual-integrations))
-\f
-;;;; Parser buffer abstraction
-
-(define-structure parser-buffer
- ;; The string buffer, as a substring:
- string
- start
- end
- ;; The offset of the string buffer within the character stream.
- ;; This is always zero if SOURCE is #F.
- base-offset
- ;; Our current position in the buffer.
- index
- ;; A procedure that is used to replenish the buffer when the
- ;; buffered characters are used up. The procedure takes three
- ;; arguments, (STRING START END), and attempts to fill the
- ;; corresponding substring, returning the number of characters
- ;; actually written. If SOURCE is #F, the buffered characters are
- ;; the entire stream.
- source
- ;; True if there are no more characters past END.
- at-end?
- ;; The number of newlines to the left of the current position.
- line)
-
-;;; The two basic kinds of buffers: substring and source. A substring
-;;; buffer is one that reads from a pre-filled substring. A source
-;;; buffer is one that reads from an unbuffered source of unbounded
-;;; length.
-
-(define (substring->parser-buffer string start end)
- (make-parser-buffer string start end 0 start #f #t 0))
-
-(define (source->parser-buffer source)
- (make-parser-buffer (make-string min-length) 0 0 0 0 source #f 0))
-
-(define-integrable min-length 256)
-
-(define (string->parser-buffer string)
- (substring->parser-buffer string 0 (string-length string)))
-
-(define (input-port->parser-buffer port)
- (source->parser-buffer
- (lambda (string start end)
- (read-substring! string start end port))))
-
-(define-structure parser-buffer-pointer
- (index #f read-only #t)
- (line #f read-only #t))
-\f
-(define (get-parser-buffer-pointer buffer)
- ;; Get an object that represents the current position.
- (make-parser-buffer-pointer (+ (parser-buffer-base-offset buffer)
- (parser-buffer-index buffer))
- (parser-buffer-line buffer)))
-
-(define (set-parser-buffer-pointer! buffer p)
- ;; Move the current position to P, which must be an object that was
- ;; previously returned by GET-PARSER-BUFFER-POINTER. The position
- ;; may only be moved to the left.
- (set-parser-buffer-index! buffer (pointer->index p buffer))
- (set-parser-buffer-line! buffer (parser-buffer-pointer-line p)))
-
-(define (get-parser-buffer-tail buffer p)
- ;; P must be a buffer pointer previously returned by
- ;; GET-PARSER-BUFFER-POINTER. Return the string of characters
- ;; between P and the current buffer pointer.
- (substring (parser-buffer-string buffer)
- (pointer->index p buffer)
- (parser-buffer-index buffer)))
-
-(define (pointer->index p buffer)
- (if (parser-buffer-pointer? p)
- (let ((p*
- (- (parser-buffer-pointer-index p)
- (parser-buffer-base-offset buffer))))
- (if (<= (parser-buffer-start buffer) p* (parser-buffer-index buffer))
- p*
- (error:bad-range-argument p 'POINTER->INDEX)))
- (error:wrong-type-argument p "parser-buffer pointer" 'POINTER->INDEX)))
-
-(define (parser-buffer-position-string object)
- (let ((pointer
- (if (parser-buffer-pointer? object)
- object
- (get-parser-buffer-pointer object))))
- (string-append
- "line "
- (number->string (+ (parser-buffer-pointer-line pointer) 1))
- ", char "
- (number->string (+ (parser-buffer-pointer-index pointer) 1)))))
-
-(define (read-parser-buffer-char buffer)
- ;; Attempt to read the next character from BUFFER, starting at the
- ;; current position. If there is a character available, increment
- ;; the position and return the character. If there are no more
- ;; characters available, return #F and leave the position unchanged.
- (and (guarantee-buffer-chars buffer 1)
- (let ((char
- (string-ref (parser-buffer-string buffer)
- (parser-buffer-index buffer))))
- (increment-buffer-index! buffer char)
- char)))
-
-(define (peek-parser-buffer-char buffer)
- ;; Attempt to read the next character from BUFFER, starting at the
- ;; current position. If there is a character available, return it,
- ;; otherwise return #F. The position is unaffected in either case.
- (and (guarantee-buffer-chars buffer 1)
- (string-ref (parser-buffer-string buffer)
- (parser-buffer-index buffer))))
-
-(define (parser-buffer-ref buffer index)
- (if (not (index-fixnum? index))
- (error:wrong-type-argument index "index" 'PARSER-BUFFER-REF))
- (and (guarantee-buffer-chars buffer (fix:+ index 1))
- (string-ref (parser-buffer-string buffer)
- (fix:+ (parser-buffer-index buffer) index))))
-\f
-(let-syntax
- ((char-matcher
- (lambda (name test)
- `(BEGIN
- (DEFINE (,(symbol-append 'MATCH-PARSER-BUFFER- name '-NO-ADVANCE)
- BUFFER REFERENCE)
- (AND (GUARANTEE-BUFFER-CHARS BUFFER 1)
- (LET ((CHAR
- (STRING-REF (PARSER-BUFFER-STRING BUFFER)
- (PARSER-BUFFER-INDEX BUFFER))))
- (DECLARE (INTEGRATE CHAR))
- ,test)))
- (DEFINE (,(symbol-append 'MATCH-PARSER-BUFFER- name)
- BUFFER REFERENCE)
- (AND (GUARANTEE-BUFFER-CHARS BUFFER 1)
- (LET ((CHAR
- (STRING-REF (PARSER-BUFFER-STRING BUFFER)
- (PARSER-BUFFER-INDEX BUFFER))))
- (AND ,test
- (BEGIN
- (INCREMENT-BUFFER-INDEX! BUFFER CHAR)
- #T)))))))))
- (char-matcher char (char=? char reference))
- (char-matcher char-ci (char-ci=? char reference))
- (char-matcher not-char (not (char=? char reference)))
- (char-matcher not-char-ci (not (char-ci=? char reference)))
- (char-matcher char-in-set (char-set-member? reference char)))
-
-(define (match-utf8-char-in-alphabet buffer alphabet)
- (let ((p (get-parser-buffer-pointer buffer)))
- (if (let ((n
- (read-utf8-code-point-from-source
- (lambda ()
- (read-parser-buffer-char buffer)))))
- (and n
- (code-point-in-alphabet? n alphabet)))
- #t
- (begin
- (set-parser-buffer-pointer! buffer p)
- #f))))
-\f
-(let-syntax
- ((string-matcher
- (lambda (suffix)
- (let ((name
- (intern (string-append "match-parser-buffer-string" suffix)))
- (match-substring
- (intern
- (string-append "match-parser-buffer-substring" suffix))))
- `(DEFINE (,name BUFFER STRING)
- (,match-substring BUFFER STRING 0 (STRING-LENGTH STRING)))))))
- (string-matcher "")
- (string-matcher "-ci")
- (string-matcher "-no-advance")
- (string-matcher "-ci-no-advance"))
-
-(let-syntax
- ((substring-matcher
- (lambda (suffix)
- `(DEFINE (,(intern
- (string-append "match-parser-buffer-substring" suffix))
- BUFFER STRING START END)
- (LET ((N (FIX:- END START)))
- (AND (GUARANTEE-BUFFER-CHARS BUFFER N)
- (,(intern (string-append "substring" suffix "=?"))
- STRING START END
- (PARSER-BUFFER-STRING BUFFER)
- (PARSER-BUFFER-INDEX BUFFER)
- (FIX:+ (PARSER-BUFFER-INDEX BUFFER) N))
- (BEGIN
- (BUFFER-INDEX+N! BUFFER N)
- #T)))))))
- (substring-matcher "")
- (substring-matcher "-ci"))
-
-(let-syntax
- ((substring-matcher
- (lambda (suffix)
- `(DEFINE (,(intern
- (string-append "match-parser-buffer-substring"
- suffix
- "-no-advance"))
- BUFFER STRING START END)
- (LET ((N (FIX:- END START)))
- (AND (GUARANTEE-BUFFER-CHARS BUFFER N)
- (,(intern (string-append "substring" suffix "=?"))
- STRING START END
- (PARSER-BUFFER-STRING BUFFER)
- (PARSER-BUFFER-INDEX BUFFER)
- (FIX:+ (PARSER-BUFFER-INDEX BUFFER) N))))))))
- (substring-matcher "")
- (substring-matcher "-ci"))
-\f
-(define-integrable (increment-buffer-index! buffer char)
- (set-parser-buffer-index! buffer (fix:+ (parser-buffer-index buffer) 1))
- (if (char=? char #\newline)
- (set-parser-buffer-line! buffer (fix:+ (parser-buffer-line buffer) 1))))
-
-(define (buffer-index+n! buffer n)
- (let ((i (parser-buffer-index buffer))
- (s (parser-buffer-string buffer)))
- (let ((j (fix:+ i n)))
- (do ((i i (fix:+ i 1)))
- ((fix:= i j))
- (if (char=? (string-ref s i) #\newline)
- (set-parser-buffer-line! buffer
- (fix:+ (parser-buffer-line buffer) 1))))
- (set-parser-buffer-index! buffer j))))
-
-(define-integrable (guarantee-buffer-chars buffer n)
- (or (fix:<= (fix:+ (parser-buffer-index buffer) n)
- (parser-buffer-end buffer))
- (guarantee-buffer-chars-1 buffer n)))
-
-(define (guarantee-buffer-chars-1 buffer n)
- (let ((min-end (fix:+ (parser-buffer-index buffer) n))
- (end (parser-buffer-end buffer)))
- (and (not (parser-buffer-at-end? buffer))
- (begin
- (let* ((string (parser-buffer-string buffer))
- (max-end (string-length string))
- (max-end*
- (let loop ((max-end* max-end))
- (if (fix:<= min-end max-end*)
- max-end*
- (loop (fix:* max-end* 2))))))
- (if (fix:> max-end* max-end)
- (let ((string* (make-string max-end*)))
- (string-move! string string* 0)
- (set-parser-buffer-string! buffer string*))))
- (let ((n-read
- (let ((string (parser-buffer-string buffer)))
- ((parser-buffer-source buffer)
- string end (string-length string)))))
- (if (fix:> n-read 0)
- (let ((end (fix:+ end n-read)))
- (set-parser-buffer-end! buffer end)
- (fix:<= min-end end))
- (begin
- (set-parser-buffer-at-end?! buffer #t)
- #f)))))))
-
-(define (discard-parser-buffer-head! buffer)
- ;; Tell the buffer that it is safe to discard all characters to the
- ;; left of the current position.
- (if (parser-buffer-source buffer)
- (let ((string (parser-buffer-string buffer))
- (index (parser-buffer-index buffer))
- (end (parser-buffer-end buffer)))
- (if (fix:< 0 index)
- (let* ((end* (fix:- end index))
- (string*
- (let ((n (string-length string)))
- (if (and (fix:> n min-length)
- (fix:<= end* (fix:quotient n 4)))
- (make-string (fix:quotient n 2))
- string))))
- (without-interrupts
- (lambda ()
- (substring-move! string index end string* 0)
- (set-parser-buffer-string! buffer string*)
- (set-parser-buffer-index! buffer 0)
- (set-parser-buffer-end! buffer end*)
- (set-parser-buffer-base-offset!
- buffer
- (+ (parser-buffer-base-offset buffer) index)))))))
- (set-parser-buffer-start! buffer (parser-buffer-index buffer))))
\ No newline at end of file
;;; -*-Scheme-*-
;;;
-;;; $Id: parser.pkg,v 1.14 2001/10/04 16:51:20 cph Exp $
+;;; $Id: parser.pkg,v 1.15 2001/11/11 05:59:19 cph Exp $
;;;
;;; Copyright (c) 2001 Massachusetts Institute of Technology
;;;
(global-definitions "../runtime/runtime")
-(define-package (runtime *parser buffer)
- (files "buffer")
- (parent ())
- (export ()
- discard-parser-buffer-head!
- get-parser-buffer-pointer
- get-parser-buffer-tail
- input-port->parser-buffer
- match-parser-buffer-char
- match-parser-buffer-char-ci
- match-parser-buffer-char-ci-no-advance
- match-parser-buffer-char-in-set
- match-parser-buffer-char-in-set-no-advance
- match-parser-buffer-char-no-advance
- match-parser-buffer-not-char
- match-parser-buffer-not-char-ci
- match-parser-buffer-not-char-ci-no-advance
- match-parser-buffer-not-char-no-advance
- match-parser-buffer-string
- match-parser-buffer-string-ci
- match-parser-buffer-string-ci-no-advance
- match-parser-buffer-string-no-advance
- match-parser-buffer-substring
- match-parser-buffer-substring-ci
- match-parser-buffer-substring-ci-no-advance
- match-parser-buffer-substring-no-advance
- match-utf8-char-in-alphabet
- parser-buffer-line
- parser-buffer-pointer-index
- parser-buffer-pointer-line
- parser-buffer-pointer?
- parser-buffer-position-string
- parser-buffer-ref
- parser-buffer?
- peek-parser-buffer-char
- read-parser-buffer-char
- set-parser-buffer-pointer!
- source->parser-buffer
- string->parser-buffer
- substring->parser-buffer))
-
(define-package (runtime *parser)
(files "synchk" "shared" "matcher" "parser")
(parent ())
make-parser-macros
parser-macros?
set-current-parser-macros!
- with-current-parser-macros))
-
-(define-package (runtime unicode)
- (files "unicode")
- (parent ())
- (export ()
- 8-bit-alphabet?
- alphabet+
- alphabet-
- alphabet->char-set
- alphabet->code-points
- alphabet->string
- alphabet?
- char-in-alphabet?
- char-set->alphabet
- code-point->utf8-string
- code-point-in-alphabet?
- code-points->alphabet
- read-utf8-code-point
- read-utf8-code-point-from-source
- string->alphabet
- unicode-code-point?
- utf8-string->code-point
- well-formed-code-points-list?
- write-utf8-code-point))
\ No newline at end of file
+ with-current-parser-macros))
\ No newline at end of file