From: Chris Hanson Date: Tue, 26 Jun 2001 18:03:26 +0000 (+0000) Subject: Initial revision. X-Git-Tag: 20090517-FFI~2706 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=53936f2d083217d0a20ebce13a8e99be7376b1a8;p=mit-scheme.git Initial revision. --- diff --git a/v7/src/star-parser/buffer.scm b/v7/src/star-parser/buffer.scm new file mode 100644 index 000000000..1805c5ff2 --- /dev/null +++ b/v7/src/star-parser/buffer.scm @@ -0,0 +1,247 @@ +;;; -*-Scheme-*- +;;; +;;; $Id: buffer.scm,v 1.1 2001/06/26 18:03:09 cph Exp $ +;;; +;;; Copyright (c) 2001 Massachusetts Institute of Technology +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 2 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +;;; 02111-1307, USA. + +;;;; Parser-buffer abstraction + +(declare (usual-integrations)) + +;;;; Parser buffer abstraction + +(define-structure parser-buffer + ;; The string buffer, as a substring: + string + start + end + ;; The offset of the string buffer within the character stream. + ;; This is always zero if SOURCE is #F. + base-offset + ;; Our current position in the buffer. + index + ;; A procedure that is used to replenish the buffer when the + ;; buffered characters are used up. The procedure takes three + ;; arguments, (STRING START END), and attempts to fill the + ;; corresponding substring, returning the number of characters + ;; actually written. If SOURCE is #F, the buffered characters are + ;; the entire stream. + source + ;; True if there are no more characters past END. + at-end?) + +;;; The two basic kinds of buffers: substring and source. A substring +;;; buffer is one that reads from a pre-filled substring. A source +;;; buffer is one that reads from an unbuffered source of unbounded +;;; length. + +(define (substring->parser-buffer string start end) + (make-parser-buffer string start end 0 start #f #t)) + +(define (source->parser-buffer source) + (make-parser-buffer (make-string min-length) 0 0 0 0 source #f)) + +(define-integrable min-length 256) + +(define (string->parser-buffer string) + (substring->parser-buffer string 0 (string-length string))) + +(define (input-port->parser-buffer port) + (source->parser-buffer + (lambda (string start end) + (read-substring! string start end port)))) + +(define (get-parser-buffer-pointer buffer) + ;; Get an object that represents the current buffer pointer. + (+ (parser-buffer-base-offset buffer) + (parser-buffer-index buffer))) + +(define (set-parser-buffer-pointer! buffer p) + ;; Move the buffer pointer to the location represented by P. P must + ;; be an object that was previously returned by GET-PARSER-BUFFER-POINTER. + ;; The buffer pointer may only be moved to the left. + (let ((p* (- p (parser-buffer-base-offset buffer)))) + (if (not (<= (parser-buffer-start buffer) p* (parser-buffer-index buffer))) + (error:bad-range-argument p 'SET-PARSER-BUFFER-POINTER!)) + (set-parser-buffer-index! buffer p*))) + +(define (decrement-parser-buffer-pointer buffer) + ;; Decrement the buffer pointer by one. + (if (fix:< (parser-buffer-start buffer) (parser-buffer-index buffer)) + (set-parser-buffer-index! buffer (fix:- (parser-buffer-index buffer) 1)) + (error "Can't decrement buffer pointer:" buffer))) + +(define (get-parser-buffer-tail buffer p) + ;; P must be a buffer pointer previously returned by + ;; GET-PARSER-BUFFER-POINTER. Return the string of characters + ;; between P and the current buffer pointer. + (let ((p* (- p (parser-buffer-base-offset buffer)))) + (if (not (<= (parser-buffer-start buffer) p* (parser-buffer-index buffer))) + (error:bad-range-argument p 'GET-PARSER-BUFFER-TAIL)) + (substring (parser-buffer-string buffer) + p* + (parser-buffer-index buffer)))) + +(let-syntax + ((char-matcher + (lambda (name test) + `(DEFINE (,(symbol-append 'MATCH-PARSER-BUFFER- name) BUFFER REFERENCE) + (AND (LET ((CHAR (PEEK-PARSER-BUFFER-CHAR BUFFER))) + (AND CHAR + ,test)) + (BEGIN + (SET-PARSER-BUFFER-INDEX! BUFFER + (FIX:+ (PARSER-BUFFER-INDEX BUFFER) + 1)) + #T)))))) + (char-matcher char (char=? char reference)) + (char-matcher char-ci (char-ci=? char reference)) + (char-matcher not-char (not (char=? char reference))) + (char-matcher not-char-ci (not (char-ci=? char reference))) + (char-matcher char-in-set (char-set-member? reference char))) + +(let-syntax + ((string-matcher + (lambda (suffix) + (let ((name + (intern (string-append "match-parser-buffer-string" suffix))) + (match-substring + (intern + (string-append "match-parser-buffer-substring" suffix)))) + `(DEFINE (,name BUFFER STRING) + (,match-substring BUFFER STRING 0 (STRING-LENGTH STRING))))))) + (string-matcher "") + (string-matcher "-ci") + (string-matcher "-no-advance") + (string-matcher "-ci-no-advance")) + +(let-syntax + ((substring-matcher + (lambda (suffix) + `(DEFINE (,(intern + (string-append "match-parser-buffer-substring" suffix)) + BUFFER STRING START END) + (LET ((N (FIX:- END START))) + (AND (GUARANTEE-BUFFER-CHARS BUFFER N) + (,(intern (string-append "substring" suffix "=?")) + STRING START END + (PARSER-BUFFER-STRING BUFFER) + (PARSER-BUFFER-INDEX BUFFER) + (FIX:+ (PARSER-BUFFER-INDEX BUFFER) N)) + (BEGIN + (SET-PARSER-BUFFER-INDEX! + BUFFER + (FIX:+ (PARSER-BUFFER-INDEX BUFFER) N)) + #T))))))) + (substring-matcher "") + (substring-matcher "-ci")) + +(let-syntax + ((substring-matcher + (lambda (suffix) + `(DEFINE (,(intern + (string-append "match-parser-buffer-substring" + suffix + "-no-advance")) + BUFFER STRING START END) + (LET ((N (FIX:- END START))) + (AND (GUARANTEE-BUFFER-CHARS BUFFER N) + (,(intern (string-append "char" suffix "=?")) + STRING START END + (PARSER-BUFFER-STRING BUFFER) + (PARSER-BUFFER-INDEX BUFFER) + (FIX:+ (PARSER-BUFFER-INDEX BUFFER) N)))))))) + (substring-matcher "") + (substring-matcher "-ci")) + +(define (read-parser-buffer-char buffer) + ;; Attempt to read the next character from BUFFER, starting at the + ;; buffer pointer. If there is a character available, increment the + ;; buffer pointer and return the character. If there are no more + ;; characters available, return #F and leave the buffer pointer + ;; unchanged. + (let ((char (peek-parser-buffer-char buffer))) + (if char + (set-parser-buffer-index! buffer + (fix:+ (parser-buffer-index buffer) 1))) + char)) + +(define (peek-parser-buffer-char buffer) + ;; Attempt to read the next character from BUFFER, starting at the + ;; buffer pointer. If there is a character available, return it, + ;; otherwise return #F. The buffer pointer is unaffected in either + ;; case. + (and (guarantee-buffer-chars buffer 1) + (string-ref (parser-buffer-string buffer) + (parser-buffer-index buffer)))) + +(define (guarantee-buffer-chars buffer n) + (let ((min-end (fix:+ (parser-buffer-index buffer) n)) + (end (parser-buffer-end buffer))) + (or (fix:<= min-end end) + (and (not (parser-buffer-at-end? buffer)) + (begin + (let* ((string (parser-buffer-string buffer)) + (max-end (string-length string)) + (max-end* + (let loop ((max-end* max-end)) + (if (fix:<= min-end max-end*) + max-end* + (loop (fix:* max-end* 2)))))) + (if (fix:> max-end* max-end) + (let ((string* (make-string max-end*))) + (string-move! string string* 0) + (set-parser-buffer-string! buffer string*)))) + (let ((n-read + (let ((string (parser-buffer-string buffer))) + ((parser-buffer-source buffer) + string end (string-length string))))) + (if (fix:> n-read 0) + (let ((end (fix:+ end n-read))) + (set-parser-buffer-end! buffer end) + (fix:<= min-end end)) + (begin + (set-parser-buffer-at-end?! buffer #t) + #f)))))))) + +(define (discard-parser-buffer-head! buffer) + ;; Tell the buffer that it is safe to discard all characters to the + ;; left of the current buffer pointer. We promise not to backtrack + ;; from here, and the buffer is allowed to enforce the promise. + (if (parser-buffer-source buffer) + (let ((string (parser-buffer-string buffer)) + (index (parser-buffer-index buffer)) + (end (parser-buffer-end buffer))) + (if (fix:< 0 index) + (let* ((end* (fix:- end index)) + (string* + (let ((n (string-length string))) + (if (and (fix:> n min-length) + (fix:<= end* (fix:quotient n 4))) + (make-string (fix:quotient n 2)) + string)))) + (without-interrupts + (lambda () + (substring-move! string index end string* 0) + (set-parser-buffer-string! buffer string*) + (set-parser-buffer-index! buffer 0) + (set-parser-buffer-end! buffer end*) + (set-parser-buffer-base-offset! + buffer + (+ (parser-buffer-base-offset buffer) index))))))) + (set-parser-buffer-start! buffer (parser-buffer-index buffer)))) \ No newline at end of file diff --git a/v7/src/star-parser/compile.scm b/v7/src/star-parser/compile.scm new file mode 100644 index 000000000..4f9326c27 --- /dev/null +++ b/v7/src/star-parser/compile.scm @@ -0,0 +1,35 @@ +;;; -*-Scheme-*- +;;; +;;; $Id: compile.scm,v 1.1 2001/06/26 18:03:11 cph Exp $ +;;; +;;; Copyright (c) 2001 Massachusetts Institute of Technology +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 2 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +;;; 02111-1307, USA. + +;;;; Parser language: compilation + +(load-option 'CREF) +(with-working-directory-pathname (directory-pathname (current-load-pathname)) + (lambda () + (for-each compile-file + '("buffer" + "matcher" + "parser" + "shared" + "synchk")) + (cref/generate-constructors "parser") + (sf "parser.con") + (sf "parser.ldr"))) \ No newline at end of file diff --git a/v7/src/star-parser/load.scm b/v7/src/star-parser/load.scm new file mode 100644 index 000000000..19f4bf80c --- /dev/null +++ b/v7/src/star-parser/load.scm @@ -0,0 +1,27 @@ +;;; -*-Scheme-*- +;;; +;;; $Id: load.scm,v 1.1 2001/06/26 18:03:13 cph Exp $ +;;; +;;; Copyright (c) 2001 Massachusetts Institute of Technology +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 2 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +;;; 02111-1307, USA. + +(load-option 'HASH-TABLE) +(with-working-directory-pathname (directory-pathname (current-load-pathname)) + (lambda () + (fluid-let ((*allow-package-redefinition?* #t)) + (package/system-loader "parser" '() 'QUERY)))) +(add-subsystem-identification! "*Parser" '(0 1)) \ No newline at end of file diff --git a/v7/src/star-parser/matcher.scm b/v7/src/star-parser/matcher.scm new file mode 100644 index 000000000..bd9a78f1a --- /dev/null +++ b/v7/src/star-parser/matcher.scm @@ -0,0 +1,263 @@ +;;; -*-Scheme-*- +;;; +;;; $Id: matcher.scm,v 1.1 2001/06/26 18:03:15 cph Exp $ +;;; +;;; Copyright (c) 2001 Massachusetts Institute of Technology +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 2 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +;;; 02111-1307, USA. + +;;;; Pattern-matcher language + +(declare (usual-integrations)) + +;;; A matcher is a procedure of one argument, a parser buffer. +;;; It performs a match against the contents of the buffer, starting +;;; at the location of the buffer pointer. If the match is +;;; successful, the buffer pointer is advanced to the end of the +;;; matched segment, and #T is returned. If the match fails, the +;;; buffer pointer is unchanged, and #F is returned. + +;;; The *MATCHER macro provides a concise way to define a broad class +;;; of matchers using a BNF-like syntax. + +(syntax-table/define system-global-syntax-table '*MATCHER + (lambda (expression) + (optimize-expression (generate-matcher-code expression)))) + +(define (generate-matcher-code expression) + (with-buffer-name + (lambda () + (with-canonical-matcher-expression expression + (lambda (expression) + (compile-matcher-expression + expression + (no-pointers) + (simple-backtracking-continuation `#T) + (simple-backtracking-continuation `#F))))))) + +;; COMPILE-MATCHER is called by the parser compiler, to generate code +;; to be embedded into a parser. + +(define (compile-matcher expression pointers if-succeed if-fail) + (with-canonical-matcher-expression expression + (lambda (expression) + (compile-matcher-expression expression pointers if-succeed if-fail)))) + +(define (compile-matcher-expression expression pointers if-succeed if-fail) + (cond ((and (pair? expression) + (symbol? (car expression)) + (list? (cdr expression)) + (hash-table/get matcher-compilers (car expression) #f)) + => (lambda (entry) + (let ((arity (car entry)) + (compiler (cdr entry))) + (if (and arity (not (= (length (cdr expression)) arity))) + (error "Incorrect arity for matcher:" expression)) + (apply compiler pointers if-succeed if-fail + (if arity + (cdr expression) + (list (cdr expression))))))) + ((symbol? expression) + (handle-pending-backtracking pointers + (lambda (pointers) + `(IF (,expression ,*buffer-name*) + ,(if-succeed (unknown-location pointers)) + ,(if-fail pointers))))) + (else + (error "Malformed matcher:" expression)))) + +;;;; Canonicalization + +(define (with-canonical-matcher-expression expression receiver) + (let ((bindings '())) + (define (do-expression expression) + (cond ((and (pair? expression) + (symbol? (car expression)) + (list? (cdr expression))) + (case (car expression) + ((ALT SEQ) + `(,(car expression) + ,@(flatten-expressions (map do-expression (cdr expression)) + (car expression)))) + ((*) + `(,(car expression) + ,(do-expression (check-1-arg expression)))) + ((+) + (do-expression + (let ((expression (check-1-arg expression))) + `(SEQ ,expression (* ,expression))))) + ((?) + (do-expression + `(ALT ,(check-1-arg expression) (SEQ)))) + ((CHAR CHAR-CI NOT-CHAR NOT-CHAR-CI) + `(,(car expression) + ,(handle-complex-expression (check-1-arg expression)))) + ((STRING) + (let ((string (check-1-arg expression))) + (if (and (string? string) (fix:= (string-length string) 1)) + `(CHAR ,(string-ref string 0)) + `(STRING ,(handle-complex-expression string))))) + ((STRING-CI) + (let ((string (check-1-arg expression))) + (if (and (string? string) (fix:= (string-length string) 1)) + `(CHAR-CI ,(string-ref string 0)) + `(STRING-CI ,(handle-complex-expression string))))) + ((ALPHABET) + `(,(car expression) + ,(handle-complex-expression + (let ((arg (check-1-arg expression))) + (if (string? arg) + (if (string-prefix? "^" arg) + `(RE-COMPILE-CHAR-SET ,(string-tail arg 1) #T) + `(RE-COMPILE-CHAR-SET ,arg #F)) + arg))))) + ((MATCHER) + (handle-complex-expression (check-1-arg expression))) + (else + (error "Unknown matcher expression:" expression)))) + ((symbol? expression) + expression) + (else + (error "Unknown matcher expression:" expression)))) + + (define (check-1-arg expression) + (if (and (pair? (cdr expression)) + (null? (cddr expression))) + (cadr expression) + (error "Malformed expression:" expression))) + + (define (handle-complex-expression expression) + (if (or (char? expression) + (string? expression) + (symbol? expression)) + expression + (let loop ((bindings* bindings)) + (if (pair? bindings*) + (if (equal? expression (caar bindings*)) + (cdar bindings*) + (loop (cdr bindings*))) + (let ((variable (generate-uninterned-symbol))) + (set! bindings (cons (cons expression variable) bindings)) + variable))))) + + (let ((expression (do-expression expression))) + (if (pair? bindings) + `(LET ,(map (lambda (b) `(,(cdr b) ,(car b))) bindings) + ,(receiver expression)) + (receiver expression))))) + +;;;; Matchers + +(define-macro (define-matcher form compiler-body) + (let ((name (car form)) + (parameters (cdr form))) + (if (symbol? parameters) + `(DEFINE-MATCHER-COMPILER ',name #F + (LAMBDA (POINTERS IF-SUCCEED IF-FAIL ,parameters) + ,compiler-body)) + `(DEFINE-MATCHER-COMPILER ',name ,(length parameters) + (LAMBDA (POINTERS IF-SUCCEED IF-FAIL ,@parameters) + ,compiler-body))))) + +(define (define-matcher-compiler keyword arity compiler) + (hash-table/put! matcher-compilers keyword (cons arity compiler)) + keyword) + +(define matcher-compilers + (make-eq-hash-table)) + +(define-macro (define-atomic-matcher form test-expression) + `(DEFINE-MATCHER ,form + (HANDLE-PENDING-BACKTRACKING POINTERS + (LAMBDA (POINTERS) + `(IF ,,test-expression + ,(IF-SUCCEED (UNKNOWN-LOCATION POINTERS)) + ,(IF-FAIL POINTERS)))))) + +(define-atomic-matcher (char char) + `(MATCH-PARSER-BUFFER-CHAR ,*buffer-name* ,char)) + +(define-atomic-matcher (char-ci char) + `(MATCH-PARSER-BUFFER-CHAR-CI ,*buffer-name* ,char)) + +(define-atomic-matcher (not-char char) + `(MATCH-PARSER-BUFFER-NOT-CHAR ,*buffer-name* ,char)) + +(define-atomic-matcher (not-char-ci char) + `(MATCH-PARSER-BUFFER-NOT-CHAR-CI ,*buffer-name* ,char)) + +(define-atomic-matcher (alphabet alphabet) + `(MATCH-PARSER-BUFFER-CHAR-IN-SET ,*buffer-name* ,alphabet)) + +(define-atomic-matcher (string string) + `(MATCH-PARSER-BUFFER-STRING ,*buffer-name* ,string)) + +(define-atomic-matcher (string-ci string) + `(MATCH-PARSER-BUFFER-STRING-CI ,*buffer-name* ,string)) + +(define-matcher (* expression) + (handle-pending-backtracking pointers + (lambda (pointers) + (let ((v (generate-uninterned-symbol))) + `(BEGIN + (LET ,v () + ,(compile-matcher-expression expression (no-pointers) + (simple-backtracking-continuation `(,v)) + (simple-backtracking-continuation `UNSPECIFIC))) + ,(if-succeed (no-pointers))))))) + +(define-matcher (seq . expressions) + (with-current-pointer pointers + (lambda (start-pointers) + (let loop + ((expressions expressions) + (pointers start-pointers)) + (if (pair? expressions) + (compile-matcher-expression (car expressions) + pointers + (lambda (pointers) + (loop (cdr expressions) pointers)) + (lambda (pointers) + (if-fail + (new-backtrack-pointer + start-pointers pointers)))) + (if-succeed pointers)))))) + +(define-matcher (alt . expressions) + (with-current-pointer pointers + (lambda (pointers) + (let loop ((expressions expressions)) + (if (pair? expressions) + (let ((predicate + (compile-matcher-expression + (car expressions) + pointers + (simple-backtracking-continuation '#T) + (simple-backtracking-continuation '#F))) + (consequent + (lambda () (if-succeed (unknown-location pointers)))) + (alternative + (lambda () (loop (cdr expressions))))) + (cond ((eq? predicate '#T) (consequent)) + ((eq? predicate '#F) (alternative)) + (else `(IF ,predicate ,(consequent) ,(alternative))))) + (if-fail pointers)))))) + +;;; Edwin Variables: +;;; Eval: (scheme-indent-method 'handle-pending-backtracking 1) +;;; Eval: (scheme-indent-method 'define-matcher-optimizer 2) +;;; Eval: (scheme-indent-method 'with-buffer-name 0) +;;; End: diff --git a/v7/src/star-parser/parser.pkg b/v7/src/star-parser/parser.pkg new file mode 100644 index 000000000..3af82813e --- /dev/null +++ b/v7/src/star-parser/parser.pkg @@ -0,0 +1,51 @@ +;;; -*-Scheme-*- +;;; +;;; $Id: parser.pkg,v 1.1 2001/06/26 18:03:18 cph Exp $ +;;; +;;; Copyright (c) 2001 Massachusetts Institute of Technology +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 2 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +;;; 02111-1307, USA. + +;;;; Parser language: packaging + +(global-definitions "$brun/runtime") + +(define-package (runtime *parser) + (files "buffer" "synchk" "shared" "matcher" "parser") + (parent ()) + (export () + decrement-parser-buffer-pointer + discard-parser-buffer-head! + input-port->parser-buffer + match-parser-buffer-char + match-parser-buffer-char-ci + match-parser-buffer-char-in-set + match-parser-buffer-not-char + match-parser-buffer-not-char-ci + match-parser-buffer-string + match-parser-buffer-string-ci + match-parser-buffer-string-ci-no-advance + match-parser-buffer-string-no-advance + match-parser-buffer-substring + match-parser-buffer-substring-ci + match-parser-buffer-substring-ci-no-advance + match-parser-buffer-substring-no-advance + parser-buffer? + peek-parser-buffer-char + read-parser-buffer-char + source->parser-buffer + string->parser-buffer + substring->parser-buffer)) \ No newline at end of file diff --git a/v7/src/star-parser/parser.scm b/v7/src/star-parser/parser.scm new file mode 100644 index 000000000..fe187b45f --- /dev/null +++ b/v7/src/star-parser/parser.scm @@ -0,0 +1,294 @@ +;;; -*-Scheme-*- +;;; +;;; $Id: parser.scm,v 1.1 2001/06/26 18:03:20 cph Exp $ +;;; +;;; Copyright (c) 2001 Massachusetts Institute of Technology +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 2 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +;;; 02111-1307, USA. + +;;;; Parser language + +(declare (usual-integrations)) + +;;; A parser is a procedure of one argument, a parser buffer. It +;;; attempts to parse the contents of the buffer, starting at the +;;; location of the buffer pointer. If the parse is successful, the +;;; buffer pointer is advanced to the end of the parsed segment, and a +;;; vector of results is returned. If the parse fails, the buffer +;;; pointer is unchanged, and #F is returned. + +;;; The *PARSER macro provides a concise way to define a broad class +;;; of parsers using a BNF-like syntax. + +(syntax-table/define system-global-syntax-table '*PARSER + (lambda (expression) + (optimize-expression (generate-parser-code expression)))) + +(define (generate-parser-code expression) + (with-buffer-name + (lambda () + (with-canonical-parser-expression expression + (lambda (expression) + (compile-parser-expression + expression + (no-pointers) + (lambda (pointers result) + (handle-pending-backtracking pointers + (lambda (pointers) + pointers + result))) + (simple-backtracking-continuation `#F))))))) + +(define (compile-parser-expression expression pointers if-succeed if-fail) + (cond ((and (pair? expression) + (symbol? (car expression)) + (list? (cdr expression)) + (hash-table/get parser-compilers (car expression) #f)) + => (lambda (entry) + (let ((arity (car entry)) + (compiler (cdr entry))) + (if (and arity (not (= (length (cdr expression)) arity))) + (error "Incorrect arity for parser:" expression)) + (apply compiler pointers if-succeed if-fail + (if arity + (cdr expression) + (list (cdr expression))))))) + ((symbol? expression) + (handle-pending-backtracking pointers + (lambda (pointers) + (with-variable-binding `(,expression ,*buffer-name*) + (lambda (result) + `(IF ,result + ,(if-succeed (unknown-location pointers) result) + ,(if-fail pointers))))))) + (else + (error "Malformed matcher:" expression)))) + +;;;; Canonicalization + +(define (with-canonical-parser-expression expression receiver) + (let ((bindings '())) + (define (do-expression expression) + (cond ((and (pair? expression) + (symbol? (car expression)) + (list? (cdr expression))) + (case (car expression) + ((ALT SEQ) + `(,(car expression) + ,@(flatten-expressions (map do-expression (cdr expression)) + (car expression)))) + ((* COMPLETE TOP-LEVEL) + `(,(car expression) + ,(do-expression (check-1-arg expression)))) + ((+) + (do-expression + (let ((expression (check-1-arg expression))) + `(SEQ ,expression (* ,expression))))) + ((?) + (do-expression + `(ALT ,(check-1-arg expression) (SEQ)))) + ((MATCH NOISE) + (check-1-arg expression) + expression) + ((DEFAULT TRANSFORM ELEMENT-TRANSFORM ENCAPSULATE) + (check-2-args expression) + `(,(car expression) ,(cadr expression) + ,(do-expression (caddr expression)))) + ((PARSER) + (let ((expression (check-1-arg expression))) + (if (symbol? expression) + expression + (let loop ((bindings* bindings)) + (if (pair? bindings*) + (if (equal? expression (caar bindings*)) + (cdar bindings*) + (loop (cdr bindings*))) + (let ((variable (generate-uninterned-symbol))) + (set! bindings + (cons (cons expression variable) bindings)) + variable)))))) + (else + (error "Unknown parser expression:" expression)))) + ((symbol? expression) + expression) + (else + (error "Unknown parser expression:" expression)))) + + (define (check-1-arg expression) + (if (and (pair? (cdr expression)) + (null? (cddr expression))) + (cadr expression) + (error "Malformed expression:" expression))) + + (define (check-2-args expression) + (if (not (and (pair? (cdr expression)) + (pair? (cddr expression)) + (null? (cdddr expression)))) + (error "Malformed expression:" expression))) + + (let ((expression (do-expression expression))) + (if (pair? bindings) + `(LET ,(map (lambda (b) `(,(cdr b) ,(car b))) bindings) + ,(receiver expression)) + (receiver expression))))) + +;;;; Parsers + +(define-macro (define-parser form compiler-body) + (let ((name (car form)) + (parameters (cdr form))) + (if (symbol? parameters) + `(DEFINE-PARSER-COMPILER ',name #F + (LAMBDA (POINTERS IF-SUCCEED IF-FAIL ,parameters) + ,compiler-body)) + `(DEFINE-PARSER-COMPILER ',name ,(length parameters) + (LAMBDA (POINTERS IF-SUCCEED IF-FAIL ,@parameters) + ,compiler-body))))) + +(define (define-parser-compiler keyword arity compiler) + (hash-table/put! parser-compilers keyword (cons arity compiler)) + keyword) + +(define parser-compilers + (make-eq-hash-table)) + +(define-parser (match matcher) + (with-current-pointer pointers + (lambda (start-pointers) + (compile-matcher matcher start-pointers + (lambda (pointers) + (if-succeed pointers + `(VECTOR (GET-PARSER-BUFFER-TAIL + ,*buffer-name* + ,(current-pointer start-pointers))))) + if-fail)))) + +(define-parser (noise matcher) + (compile-matcher matcher pointers + (lambda (pointers) (if-succeed pointers `(VECTOR))) + if-fail)) + +(define-parser (default value parser) + (compile-parser-expression parser pointers if-succeed + (lambda (pointers) + (if-succeed pointers `(VECTOR ,value))))) + +(define-parser (transform transform parser) + (with-current-pointer pointers + (lambda (start-pointers) + (compile-parser-expression parser start-pointers + (lambda (pointers result) + (with-variable-binding `(,transform ,result) + (lambda (result) + `(IF ,result + ,(if-succeed pointers result) + ,(if-fail + (new-backtrack-pointer start-pointers pointers)))))) + if-fail)))) + +(define-parser (element-transform transform parser) + (compile-parser-expression parser pointers + (lambda (pointers result) + (if-succeed pointers `(VECTOR-MAP ,transform ,result))) + if-fail)) + +(define-parser (encapsulate transform parser) + (compile-parser-expression parser pointers + (lambda (pointers result) + (if-succeed pointers `(VECTOR (,transform ,result)))) + if-fail)) + +(define-parser (complete parser) + (with-current-pointer pointers + (lambda (start-pointers) + (compile-parser-expression parser start-pointers + (lambda (pointers result) + `(IF (PEEK-PARSER-BUFFER-CHAR ,*buffer-name*) + ,(if-fail (new-backtrack-pointer start-pointers pointers)) + (BEGIN + (DISCARD-PARSER-BUFFER-HEAD! ,*buffer-name*) + ,result))) + if-fail)))) + +(define-parser (top-level parser) + (compile-parser-expression parser pointers + (lambda (pointers result) + `(BEGIN + (DISCARD-PARSER-BUFFER-HEAD! ,*buffer-name*) + ,(if-succeed pointers result))) + if-fail)) + +(define-parser (seq . ps) + (if (pair? ps) + (if (pair? (cdr ps)) + (with-current-pointer pointers + (lambda (start-pointers) + (let loop ((ps ps) (pointers start-pointers) (results '())) + (compile-parser-expression (car ps) start-pointers + (lambda (pointers result) + (let ((results (cons result results))) + (if (pair? (cdr ps)) + (loop (cdr ps) pointers results) + (if-succeed pointers + `(VECTOR-APPEND ,@(reverse results)))))) + (lambda (pointers) + (if-fail + (new-backtrack-pointer start-pointers pointers))))))) + (compile-parser-expression (car ps) pointers if-succeed if-fail)) + (if-succeed pointers `(VECTOR)))) + +(define-parser (alt . ps) + (with-current-pointer pointers + (lambda (pointers) + (with-variable-binding + `(OR ,@(map (lambda (p) + (compile-parser-expression p pointers + (lambda (pointers result) + (handle-pending-backtracking pointers + (lambda (pointers) + pointers + result))) + (simple-backtracking-continuation `#F))) + ps)) + (lambda (result) + `(IF ,result + ,(if-succeed (unknown-location pointers) result) + ,(if-fail pointers))))))) + +(define-parser (* parser) + (handle-pending-backtracking pointers + (lambda (pointers) + (if-succeed + (unknown-location pointers) + (let ((loop (generate-uninterned-symbol)) + (elements (generate-uninterned-symbol))) + `(LET ,loop ((,elements (VECTOR))) + ,(compile-parser-expression parser (no-pointers) + (lambda (pointers element) + (handle-pending-backtracking pointers + (lambda (pointers) + pointers + `(,loop (VECTOR-APPEND ,elements ,element))))) + (lambda (pointers) + (handle-pending-backtracking pointers + (lambda (pointers) + pointers + elements)))))))))) + +;;; Edwin Variables: +;;; Eval: (scheme-indent-method 'handle-pending-backtracking 1) +;;; Eval: (scheme-indent-method 'with-buffer-name 0) +;;; End: diff --git a/v7/src/star-parser/shared.scm b/v7/src/star-parser/shared.scm new file mode 100644 index 000000000..ab9f8dcb5 --- /dev/null +++ b/v7/src/star-parser/shared.scm @@ -0,0 +1,330 @@ +;;; -*-Scheme-*- +;;; +;;; $Id: shared.scm,v 1.1 2001/06/26 18:03:22 cph Exp $ +;;; +;;; Copyright (c) 2001 Massachusetts Institute of Technology +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 2 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +;;; 02111-1307, USA. + +;;;; Shared code for matchers and parsers + +(declare (usual-integrations)) + +(define (with-buffer-name thunk) + (let ((v (generate-uninterned-symbol))) + `(LAMBDA (,v) + ,(fluid-let ((*buffer-name* v)) + (thunk))))) + +(define *buffer-name*) + +(define (with-variable-bindings expressions receiver) + (let ((variables + (map (lambda (x) x (generate-uninterned-symbol)) expressions))) + `(LET ,(map list variables expressions) + ,(apply receiver variables)))) + +(define (with-variable-binding expression receiver) + (with-variable-bindings (list expression) receiver)) + +;;;; Buffer pointers + +(define (no-pointers) + ;; Initial pointer set, used only when we know nothing about the + ;; context that an expression is expanding in. + (cons #f #f)) + +(define (with-current-pointer pointers procedure) + ;; Get a pointer to the current position, if any. This is called + ;; wherever we potentially need a pointer reference. But we track + ;; usage of the pointer, so that we only generate calls to + ;; GET-PARSER-BUFFER-POINTER when the pointer is used. + (if (car pointers) + (procedure pointers) + (let ((v.u (cons (generate-uninterned-symbol) #f))) + (let ((x (procedure (cons v.u (cdr pointers))))) + (if (cdr v.u) + `(LET ((,(car v.u) (GET-PARSER-BUFFER-POINTER ,*buffer-name*))) + ,x) + x))))) + +(define (current-pointer pointers) + (if (not (car pointers)) + (error "Missing required current pointer:" pointers)) + (set-cdr! (car pointers) #t) + (car (car pointers))) + +(define (unknown-location pointers) + ;; Discard the pointer to the current position, if any. Used after + ;; successful matching operations that modify the buffer position. + (cons #f (cdr pointers))) + +(define (new-backtrack-pointer backtrack-pointers pointers) + ;; Specify that we want to backtrack to the position specified in + ;; BACKTRACK-POINTERS. But don't actually change the position yet. + ;; Instead delay the move until it's actually needed. Without the + ;; delay, we can generate multiple sequential calls to change the + ;; position, which is wasteful since only the last call in the + ;; sequence is meaningful. + (cons (car pointers) (car backtrack-pointers))) + +(define (handle-pending-backtracking pointers procedure) + ;; Perform a pending backtracking operation, if any. + (if (and (cdr pointers) + (not (eq? (car pointers) (cdr pointers)))) + (begin + (set-cdr! (cdr pointers) #t) + `(BEGIN + (SET-PARSER-BUFFER-POINTER! ,*buffer-name* ,(car (cdr pointers))) + ,(procedure (cons (cdr pointers) #f)))) + (procedure (cons (car pointers) #f)))) + +(define (simple-backtracking-continuation value) + (lambda (pointers) + (handle-pending-backtracking pointers + (lambda (pointers) + pointers + value)))) + +;;;; Code optimizer + +(define (optimize-expression expression) + (let loop ((entries optimizer-patterns)) + (cond ((pair? entries) + (if (and (syntax-match? (caar entries) expression) + (or (not (cadar entries)) + ((cadar entries) expression))) + (let ((expression* ((cddar entries) expression))) + (if (equal? expression* expression) + expression + (optimize-expression expression*))) + (loop (cdr entries)))) + ((and (pair? expression) + (symbol? (car expression))) + (let ((expression* + (cons (car expression) + (map optimize-expression (cdr expression))))) + (if (equal? expression* expression) + expression + (optimize-expression expression*)))) + (else expression)))) + +(define (define-optimizer pattern predicate optimizer) + (let ((entry (assoc pattern optimizer-patterns)) + (datum (cons predicate optimizer))) + (if entry + (set-cdr! entry datum) + (begin + (set! optimizer-patterns + (cons (cons pattern datum) optimizer-patterns)) + unspecific)))) + +(define optimizer-patterns + '()) + +(define (predicate-not-or expression) + (not (and (pair? (cadr expression)) + (eq? (caadr expression) 'OR)))) + +(define-optimizer '('IF EXPRESSION #T #F) predicate-not-or + (lambda (expression) + (cadr expression))) + +(define-optimizer '('IF EXPRESSION #F #T) predicate-not-or + (lambda (expression) + `(NOT ,(cadr expression)))) + +(define-optimizer '('IF EXPRESSION EXPRESSION #F) + (lambda (expression) + (not (eq? (caddr expression) '#T))) + (lambda (expression) + `(AND ,(cadr expression) ,(caddr expression)))) + +(define-optimizer '('IF EXPRESSION #F EXPRESSION) + (lambda (expression) + (not (eq? (cadddr expression) '#T))) + (lambda (expression) + `(AND (NOT ,(cadr expression)) ,(cadddr expression)))) + +(define-optimizer '('IF EXPRESSION EXPRESSION EXPRESSION) + (lambda (expression) + (equal? (caddr expression) (cadddr expression))) + (lambda (expression) + `(BEGIN + ,(cadr expression) + ,(caddr expression)))) + +(define-optimizer '('IF EXPRESSION EXPRESSION 'UNSPECIFIC) #f + (lambda (expression) + `(IF ,(cadr expression) ,(caddr expression)))) + +(define-optimizer '('IF EXPRESSION EXPRESSION) + (lambda (expression) + (and (eq? (caddr expression) 'UNSPECIFIC) + (predicate-not-or expression))) + (lambda (expression) + (cadr expression))) + +(define-optimizer '('IF EXPRESSION + ('IF EXPRESSION EXPRESSION EXPRESSION) + EXPRESSION) + (lambda (expression) + (equal? (cadddr (caddr expression)) + (cadddr expression))) + (lambda (expression) + `(IF (AND ,(cadr expression) ,(cadr (caddr expression))) + ,(caddr (caddr expression)) + ,(cadddr expression)))) + +(define-optimizer '('IF EXPRESSION + EXPRESSION + ('IF EXPRESSION EXPRESSION EXPRESSION)) + (lambda (expression) + (equal? (caddr (cadddr expression)) + (caddr expression))) + (lambda (expression) + `(IF (OR ,(cadr expression) ,(cadr (cadddr expression))) + ,(caddr expression) + ,(cadddr (cadddr expression))))) + +(define-optimizer '('IF EXPRESSION + ('BEGIN . (+ EXPRESSION)) + EXPRESSION) + (lambda (expression) + (let ((expression* (car (last-pair (caddr expression))))) + (and (syntax-match? '('IF EXPRESSION EXPRESSION EXPRESSION) + expression*) + (equal? (cadddr expression*) + (cadddr expression))))) + (lambda (expression) + (let ((expression* (car (last-pair (caddr expression))))) + `(IF (AND ,(cadr expression) + (BEGIN ,@(except-last-pair (cdr (caddr expression))) + ,(cadr expression*))) + ,(caddr expression*) + ,(cadddr expression))))) + +(define-optimizer '('IF EXPRESSION + EXPRESSION + ('BEGIN . (+ EXPRESSION))) + (lambda (expression) + (let ((expression* (car (last-pair (cadddr expression))))) + (and (syntax-match? '('IF EXPRESSION EXPRESSION EXPRESSION) + expression*) + (equal? (caddr expression*) + (caddr expression))))) + (lambda (expression) + (let ((expression* (car (last-pair (cadddr expression))))) + `(IF (OR ,(cadr expression) + (BEGIN ,@(except-last-pair (cdr (cadddr expression))) + ,(cadr expression*))) + ,(caddr expression) + ,(cadddr expression*))))) + +(define-optimizer '('LET ((IDENTIFIER EXPRESSION)) + ('IF IDENTIFIER + IDENTIFIER + EXPRESSION)) + (lambda (expression) + (and (eq? (caar (cadr expression)) + (cadr (caddr expression))) + (eq? (caddr (caddr expression)) + (cadr (caddr expression))))) + (lambda (expression) + `(OR ,(cadar (cadr expression)) + ,(cadddr (caddr expression))))) + +(define-optimizer '('LAMBDA (* IDENTIFIER) EXPRESSION) #f + (lambda (expression) + `(LAMBDA ,(cadr expression) ,(optimize-expression (caddr expression))))) + +(define-optimizer '('LET IDENTIFIER (* (IDENTIFIER EXPRESSION)) EXPRESSION) + #f + (lambda (expression) + `(LET ,(cadr expression) + ,(map (lambda (b) (list (car b) (optimize-expression (cadr b)))) + (caddr expression)) + ,(optimize-expression (cadddr expression))))) + +(define-optimizer '('LET (* (IDENTIFIER EXPRESSION)) EXPRESSION) #f + (lambda (expression) + `(LET ,(map (lambda (b) (list (car b) (optimize-expression (cadr b)))) + (cadr expression)) + ,(optimize-expression (caddr expression))))) + +(define-optimizer '('VECTOR-MAP EXPRESSION ('VECTOR EXPRESSION)) #f + (lambda (expression) + `(VECTOR (,(cadr expression) ,(cadr (caddr expression)))))) + +(define-optimizer '('VECTOR-MAP IDENTIFIER ('VECTOR . (* EXPRESSION))) #f + (lambda (expression) + `(VECTOR + ,@(map (lambda (subexpression) + `(,(cadr expression) ,subexpression)) + (cdr (caddr expression)))))) + +(define-optimizer '('NOT EXPRESSION) #f + (lambda (expression) + `(NOT ,(optimize-expression (cadr expression))))) + +(define-optimizer '('VECTOR-APPEND . (* EXPRESSION)) #f + (lambda (expression) + (let ((expressions + (delete '(VECTOR) + (map optimize-expression + (flatten-subexpressions expression))))) + (if (pair? expressions) + (if (pair? (cdr expressions)) + `(,(car expression) ,@expressions) + (car expressions)) + `(VECTOR))))) + +(define-optimizer '('AND . (* EXPRESSION)) #f + (lambda (expression) + (optimize-group-expression expression '#T))) + +(define-optimizer '('OR . (* EXPRESSION)) #f + (lambda (expression) + (optimize-group-expression expression '#F))) + +(define-optimizer '('BEGIN . (+ EXPRESSION)) #f + (lambda (expression) + (optimize-group-expression expression 'UNSPECIFIC))) + +(define (optimize-group-expression expression identity) + (let loop + ((expressions + (map optimize-expression (flatten-subexpressions expression)))) + (cond ((not (pair? expressions)) + identity) + ((equal? (car (last-pair expressions)) identity) + (loop (except-last-pair! expressions))) + ((pair? (cdr expressions)) + `(,(car expression) ,@expressions)) + (else + (car expressions))))) + +(define (flatten-subexpressions expression) + (flatten-expressions (cdr expression) (car expression))) + +(define (flatten-expressions expressions keyword) + (let loop ((expressions expressions)) + (if (pair? expressions) + (if (and (pair? (car expressions)) + (eq? (caar expressions) keyword)) + (loop (append (cdar expressions) (cdr expressions))) + (cons (car expressions) (loop (cdr expressions)))) + '()))) \ No newline at end of file diff --git a/v7/src/star-parser/synchk.scm b/v7/src/star-parser/synchk.scm new file mode 100644 index 000000000..95b59251a --- /dev/null +++ b/v7/src/star-parser/synchk.scm @@ -0,0 +1,76 @@ +;;; -*-Scheme-*- +;;; +;;; $Id: synchk.scm,v 1.1 2001/06/26 18:03:24 cph Exp $ +;;; +;;; Copyright (c) 1989 Massachusetts Institute of Technology +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 2 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +;;; 02111-1307, USA. + +;;;; Syntax Checking +;;; written by Alan Bawden +;;; modified by Chris Hanson + +(declare (usual-integrations)) + +(define (syntax-match? pattern object) + (let ((match-error (lambda () (error "ill-formed pattern" pattern)))) + (cond ((symbol? pattern) + (case pattern + ((IDENTIFIER) (symbol? object)) + ((ANYTHING EXPRESSION FORM) true) + ((BVL) (lambda-pattern? object)) + (else (match-error)))) + ((pair? pattern) + (case (car pattern) + ((QUOTE) + (if (and (pair? (cdr pattern)) + (null? (cddr pattern))) + (eqv? (cadr pattern) object) + (match-error))) + ((*) + (if (pair? (cdr pattern)) + (let ((head (cadr pattern)) + (tail (cddr pattern))) + (let loop ((object object)) + (or (and (pair? object) + (syntax-match? head (car object)) + (loop (cdr object))) + (syntax-match? tail object)))) + (match-error))) + ((+) + (if (pair? (cdr pattern)) + (let ((head (cadr pattern)) + (tail (cddr pattern))) + (and (pair? object) + (syntax-match? head (car object)) + (let loop ((object (cdr object))) + (or (and (pair? object) + (syntax-match? head (car object)) + (loop (cdr object))) + (syntax-match? tail object))))) + (match-error))) + ((?) + (if (pair? (cdr pattern)) + (or (and (syntax-match? (cadr pattern) (car object)) + (syntax-match? (cddr pattern) (cdr object))) + (syntax-match? (cddr pattern) object)) + (match-error))) + (else + (and (pair? object) + (syntax-match? (car pattern) (car object)) + (syntax-match? (cdr pattern) (cdr object)))))) + (else + (eqv? pattern object))))) \ No newline at end of file diff --git a/v7/src/star-parser/test-parser.scm b/v7/src/star-parser/test-parser.scm new file mode 100644 index 000000000..6be4957dc --- /dev/null +++ b/v7/src/star-parser/test-parser.scm @@ -0,0 +1,144 @@ +;;; -*-Scheme-*- +;;; +;;; $Id: test-parser.scm,v 1.1 2001/06/26 18:03:26 cph Exp $ +;;; +;;; Copyright (c) 2001 Massachusetts Institute of Technology +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 2 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +;;; 02111-1307, USA. + +(define (test-matcher matcher string) + (let ((buffer (string->parser-buffer string))) + (and (matcher buffer) + (if (peek-parser-buffer-char buffer) + (get-parser-buffer-pointer buffer) + #t)))) + +(define (test-parser parser string) + (let ((buffer (string->parser-buffer string))) + (let ((v (parser buffer))) + (and v + (if (peek-parser-buffer-char buffer) + (begin + (write-string "Lose: ") + (write (get-parser-buffer-pointer buffer)) + (newline) + #f) + v))))) + +(define parse-list + (*parser + (encapsulate vector->list + (seq (noise (string "(")) + (noise (* (alphabet char-set:whitespace))) + (? (seq parse-element + (* (seq (noise (+ (alphabet char-set:whitespace))) + parse-element)))) + (noise (* (alphabet char-set:whitespace))) + (noise (string ")")))))) + +(define parse-element + (*parser (alt parse-identifier parse-num-10 parse-list))) + +(define parse-identifier + (*parser (element-transform intern (match match-identifier)))) + +(define parse-num-10 + (*parser (element-transform string->number (match match-num-10)))) + +(define parse-whitespace + (*parser (noise (+ (alphabet char-set:whitespace))))) + +(define parse-optional-whitespace + (*parser (noise (* (alphabet char-set:whitespace))))) + +(define match-identifier + (let* ((initial-alphabet + (char-set-union char-set:alphabetic + (string->char-set "!$%&*/:<=>?^_~"))) + (subsequent-alphabet + (char-set-union initial-alphabet + char-set:numeric + (string->char-set "+-.@")))) + (*matcher + (alt (seq (alphabet initial-alphabet) + (* (alphabet subsequent-alphabet))) + (string "+") + (string "-") + (string "..."))))) + +(define match-num-10 + (*matcher + (seq (? (alt (seq (string-ci "#d") + (? (alt (string-ci "#i") + (string-ci "#e")))) + (seq (alt (string-ci "#i") + (string-ci "#e")) + (? (string-ci "#d"))))) + match-complex-10))) + +(define match-complex-10 + (*matcher + (alt (seq match-ureal-10 + (? (alt match-angle-10 + match-imaginary-10))) + (seq (alphabet (string->char-set "+-")) + (alt (seq match-ureal-10 + (? (alt match-angle-10 + match-imaginary-10 + (string-ci "i")))) + (string-ci "i")))))) + +(define match-angle-10 + (*matcher + (seq (string "@") + (? (alphabet (string->char-set "+-"))) + match-ureal-10))) + +(define match-imaginary-10 + (*matcher + (seq (alphabet (string->char-set "+-")) + (? match-ureal-10) + (string-ci "i")))) + +(define match-ureal-10 + (*matcher + (alt (seq (+ (alphabet char-set:numeric)) + (? (alt (seq (string ".") + (* (alphabet char-set:numeric)) + (* (string "#")) + (? match-exponent-10)) + (seq (string "/") + (+ (alphabet char-set:numeric)) + (* (string "#"))) + (seq (+ (string "#")) + (? (alt (seq (string ".") + (* (string "#")) + (? match-exponent-10)) + (seq (string "/") + (+ (alphabet char-set:numeric)) + (* (string "#"))) + match-exponent-10))) + match-exponent-10))) + (seq (string ".") + (+ (alphabet char-set:numeric)) + (* (string "#")) + (? match-exponent-10))))) + +(define match-exponent-10 + (*matcher + (seq (alphabet (string->char-set "esfdlESFDL")) + (? (alphabet (string->char-set "+-"))) + (+ (alphabet char-set:numeric))))) \ No newline at end of file