From bc9b6a558f47d1ca43ac55e938eb28184f9b1d68 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Fri, 6 Jul 2001 20:50:49 +0000 Subject: [PATCH] Package the code. Move parser macros to their own file so that they can be loaded during compilation. Restructure the top-level parser to allow passing the DTD down to the element parser for entity expansion. Implement attribute-value normalization. Add restrictions required for rejection of non-wf documents: no "--" in comments; no duplicate attribute names. Change names of DTD structures to reflect their keywords. Fix various minor bugs, discovered either by the test suite or by the compiler. --- v7/src/xml/compile.scm | 33 ++ v7/src/xml/ed-ffi.scm | 26 ++ v7/src/xml/load.scm | 24 ++ v7/src/xml/parser-macro.scm | 58 ++++ v7/src/xml/xml-parser.scm | 602 ++++++++++++++++++------------------ v7/src/xml/xml-struct.scm | 51 +-- v7/src/xml/xml.pkg | 147 +++++++++ 7 files changed, 623 insertions(+), 318 deletions(-) create mode 100644 v7/src/xml/compile.scm create mode 100644 v7/src/xml/ed-ffi.scm create mode 100644 v7/src/xml/load.scm create mode 100644 v7/src/xml/parser-macro.scm create mode 100644 v7/src/xml/xml.pkg diff --git a/v7/src/xml/compile.scm b/v7/src/xml/compile.scm new file mode 100644 index 000000000..92ede4875 --- /dev/null +++ b/v7/src/xml/compile.scm @@ -0,0 +1,33 @@ +;;; -*-Scheme-*- +;;; +;;; $Id: compile.scm,v 1.1 2001/07/06 20:50:37 cph Exp $ +;;; +;;; Copyright (c) 2001 Massachusetts Institute of Technology +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 2 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +;;; 02111-1307, USA. + +(load-option 'CREF) +(with-working-directory-pathname (directory-pathname (current-load-pathname)) + (lambda () + (load "parser-macro") + (for-each compile-file + '("xml-struct" + "xml-parser" + ;;"xml-output" + )) + (cref/generate-constructors "xml") + (sf "xml.con") + (sf "xml.ldr"))) \ No newline at end of file diff --git a/v7/src/xml/ed-ffi.scm b/v7/src/xml/ed-ffi.scm new file mode 100644 index 000000000..0e0b4b8ec --- /dev/null +++ b/v7/src/xml/ed-ffi.scm @@ -0,0 +1,26 @@ +;;; -*-Scheme-*- +;;; +;;; $Id: ed-ffi.scm,v 1.1 2001/07/06 20:50:39 cph Exp $ +;;; +;;; Copyright (c) 2001 Massachusetts Institute of Technology +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 2 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +;;; 02111-1307, USA. + +;;;; XML: Edwin buffer packaging info + +(standard-scheme-find-file-initialization + '#(("xml-struct" (runtime xml structure) system-global-syntax-table) + ("xml-parser" (runtime xml parser) system-global-syntax-table))) \ No newline at end of file diff --git a/v7/src/xml/load.scm b/v7/src/xml/load.scm new file mode 100644 index 000000000..b174440f9 --- /dev/null +++ b/v7/src/xml/load.scm @@ -0,0 +1,24 @@ +;;; -*-Scheme-*- +;;; +;;; $Id: load.scm,v 1.1 2001/07/06 20:50:41 cph Exp $ +;;; +;;; Copyright (c) 2001 Massachusetts Institute of Technology +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 2 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +;;; 02111-1307, USA. + +(with-working-directory-pathname (directory-pathname (current-load-pathname)) + (lambda () + (package/system-loader "xml" '() 'QUERY))) \ No newline at end of file diff --git a/v7/src/xml/parser-macro.scm b/v7/src/xml/parser-macro.scm new file mode 100644 index 000000000..6fc9c9d60 --- /dev/null +++ b/v7/src/xml/parser-macro.scm @@ -0,0 +1,58 @@ +;;; -*-Scheme-*- +;;; +;;; $Id: parser-macro.scm,v 1.1 2001/07/06 20:50:43 cph Exp $ +;;; +;;; Copyright (c) 2001 Massachusetts Institute of Technology +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 2 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +;;; 02111-1307, USA. + +;;;; XML parser macros + +(declare (usual-integrations)) + +(define-*parser-macro S ;[3] + `(NOISE (+ (ALPHABET CHAR-SET:XML-WHITESPACE)))) + +(define-*parser-macro S? + `(NOISE (* (ALPHABET CHAR-SET:XML-WHITESPACE)))) + +(define-*parser-macro (bracket description open close . body) + (let ((v (generate-uninterned-symbol))) + `(WITH-POINTER ,v + (SEQ ,open + ,@body + (ALT ,close + (SEXP + (LAMBDA (BUFFER) + BUFFER + (ERROR + ,(if (string? description) + (string-append "Unterminated " description " at") + `(STRING-APPEND "Unterminated " ,description " at")) + (PARSER-BUFFER-POSITION-STRING ,v))))))))) + +(define-*parser-macro (sbracket description open close . body) + `(BRACKET ,description (NOISE (STRING ,open)) (NOISE (STRING ,close)) + ,@body)) + +(define-*parser-macro (require-success message body) + `(ALT ,body + (SEXP + (LAMBDA (BUFFER) + (ERROR ,(if (string? message) + (string-append message " at") + `(STRING-APPEND ,message " at")) + (PARSER-BUFFER-POSITION-STRING BUFFER)))))) \ No newline at end of file diff --git a/v7/src/xml/xml-parser.scm b/v7/src/xml/xml-parser.scm index 249509694..4d193230a 100644 --- a/v7/src/xml/xml-parser.scm +++ b/v7/src/xml/xml-parser.scm @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: xml-parser.scm,v 1.2 2001/07/05 20:47:41 cph Exp $ +;;; $Id: xml-parser.scm,v 1.3 2001/07/06 20:50:47 cph Exp $ ;;; ;;; Copyright (c) 2001 Massachusetts Institute of Technology ;;; @@ -25,9 +25,6 @@ ;;; standard, second edition, 6 October 2000. Each such comment marks ;;; the code that corresponds to that rule. -;;; **** TO DO **** -;;; * Attribute-value normalization (p. 29). - (declare (usual-integrations)) ;;;; Utilities @@ -42,169 +39,117 @@ (define char-set:char-data (char-set-difference char-set:xml-char (char-set #\< #\&))) -(define-*parser-macro S ;[3] - `(NOISE (+ (ALPHABET CHAR-SET:XML-WHITESPACE)))) - -(define-*parser-macro S? - `(NOISE (* (ALPHABET CHAR-SET:XML-WHITESPACE)))) - (define char-set:xml-whitespace (char-set #\space #\tab #\return #\linefeed)) -(define-*parser-macro (bracket description open close . body) - (let ((v (generate-uninterned-symbol))) - `(WITH-POINTER ,v - (SEQ ,open - ,@body - (ALT ,close - (SEXP - (LAMBDA (BUFFER) - BUFFER - (ERROR - ,(if (string? description) - (string-append "Unterminated " description " at") - `(STRING-APPEND "Unterminated " ,description " at")) - (PARSER-BUFFER-POSITION-STRING ,v))))))))) - -(define-*parser-macro (sbracket description open close . body) - `(BRACKET ,description (NOISE (STRING ,open)) (NOISE (STRING ,close)) - ,@body)) - -(define-*parser-macro (require-success message body) - `(ALT ,body - (SEXP - (LAMBDA (BUFFER) - (ERROR ,(if (string? message) - (string-append message " at") - `(STRING-APPEND ,message " at")) - (PARSER-BUFFER-POSITION-STRING BUFFER)))))) +(define (coalesce-strings! elements) + (do ((elements elements (cdr elements))) + ((not (pair? elements))) + (if (and (string? (car elements)) + (pair? (cdr elements)) + (string? (cadr elements))) + (begin + (set-car! elements + (string-append (car elements) + (cadr elements))) + (set-cdr! elements (cddr elements)))))) (define (make-xml-char-reference n) - (if (not (or (= n #x9) - (= n #xA) - (= n #xD) - (<= #x20 n #xD7FF) - (<= #xE000 n #xFFFD) - (<= #x10000 n #x10FFFF))) + (if (not (valid-xml-code-point? n)) (error "Disallowed Unicode character code:" n)) (integer->unicode-string n)) +(define (valid-xml-code-point? n) + (and (< n #x110000) + (if (< n #xD800) + (or (>= n #x20) + (= n #x9) + (= n #xA) + (= n #xD)) + (and (>= n #xE000) + (not (or (= n #xFFFE) + (= n #xFFFF))))))) + (define (integer->unicode-string n) - (let ((initial-char - (lambda (n offset) - (integer->char - (fix:or (fix:and (fix:lsh #xFF (fix:+ n 1)) #xFF) - (fix:lsh n -6))))) - (subsequent-char - (lambda (offset) - (integer->char - (fix:or #x80 - (fix:and (fix:lsh n (fix:- 0 offset)) - #x3F)))))) - (declare (integrate-operator initial-char subsequent-char)) - (if (not (and (<= 0 n) (< n #x80000000))) - (error:bad-range-argument n 'INTEGER->UNICODE-STRING)) - (cond ((< n #x00000080) - (string (integer->char n))) - ((< n #x00000800) - (string (initial-char 5 6) - (subsequent-char 6))) - ((< n #x00010000) - (string (initial-char 4 12) - (subsequent-char 12) - (subsequent-char 6))) - ((< n #x00200000) - (string (initial-char 3 18) - (subsequent-char 18) - (subsequent-char 12) - (subsequent-char 6))) - ((< n #x04000000) - (string (initial-char 2 24) - (subsequent-char 24) - (subsequent-char 18) - (subsequent-char 12) - (subsequent-char 6))) - (else - (string (initial-char 1 30) - (subsequent-char 30) - (subsequent-char 24) - (subsequent-char 18) - (subsequent-char 12) - (subsequent-char 6)))))) - -(define (normalize-line-endings string) - (if (string-find-next-char string #\return) - (let ((end (string-length string))) - (let ((step-over-eol - (lambda (index) - (fix:+ index - (if (and (fix:< (fix:+ index 1) end) - (char=? (string-ref string (fix:+ index 1)) - #\linefeed)) - 2 - 1))))) - (let ((n - (let loop ((start 0) (n 0)) - (let ((index - (substring-find-next-char string start end - #\return))) - (if index - (loop (step-over-eol index) - (fix:+ n (fix:+ (fix:- index start) 1))) - (fix:+ n (fix:- end start))))))) - (let ((string* (make-string n))) - (let loop ((start 0) (start* 0)) - (let ((index - (substring-find-next-char string start end - #\return))) - (if index - (let ((start* - (substring-move! string start index - string* start*))) - (string-set! string* start* #\newline) - (loop (step-over-eol index) - (fix:+ start* 1))) - (substring-move! string start end string* start*)))) - string*)))) - string)) + + (define-integrable (initial-char n offset) + (integer->char + (fix:or (fix:and (fix:lsh #xFF (fix:+ n 1)) #xFF) + (fix:lsh n (fix:- 0 offset))))) + + (define-integrable (subsequent-char offset) + (integer->char + (fix:or #x80 + (fix:and (fix:lsh n (fix:- 0 offset)) #x3F)))) + + (if (not (and (<= 0 n) (< n #x80000000))) + (error:bad-range-argument n 'INTEGER->UNICODE-STRING)) + (cond ((< n #x00000080) + (string (integer->char n))) + ((< n #x00000800) + (string (initial-char 5 6) + (subsequent-char 6))) + ((< n #x00010000) + (string (initial-char 4 12) + (subsequent-char 12) + (subsequent-char 6))) + ((< n #x00200000) + (string (initial-char 3 18) + (subsequent-char 18) + (subsequent-char 12) + (subsequent-char 6))) + ((< n #x04000000) + (string (initial-char 2 24) + (subsequent-char 24) + (subsequent-char 18) + (subsequent-char 12) + (subsequent-char 6))) + (else + (string (initial-char 1 30) + (subsequent-char 30) + (subsequent-char 24) + (subsequent-char 18) + (subsequent-char 12) + (subsequent-char 6))))) ;;;; Top level -(define parse-xml-document ;[1,22] - (*parser - (transform - (lambda (v) - (make-xml-document (vector-ref v 0) - (vector-ref v 1) - (vector-ref v 2) - (vector-ref v 3) - (vector-ref v 4) - (vector-ref v 5))) - (complete - (seq (alt (top-level parse-declaration) - (values #f)) - (encapsulate vector->list - (* (top-level parse-misc))) - (alt (seq (top-level parse-dtd) - (encapsulate vector->list - (* (top-level parse-misc)))) - (values #f '())) - (top-level parse-element) - (encapsulate vector->list - (* (top-level parse-misc)))))))) +(define (parse-xml-document buffer) ;[1,22] + (let* ((declaration (parse-declaration buffer)) + (misc-1 (parse-misc buffer)) + (dtd (parse-dtd buffer)) + (misc-2 (if dtd (parse-misc buffer) '())) + (element + (fluid-let ((*dtd* dtd)) + (parse-root-element buffer))) + (misc-3 (parse-misc buffer))) + (if (peek-parser-buffer-char buffer) + (error "Unparsed content in input at" + (parser-buffer-position-string buffer))) + (make-xml-document declaration + misc-1 + dtd + misc-2 + element + misc-3))) + +(define *dtd*) (define parse-misc ;[27] (*parser - (alt parse-comment - parse-processing-instructions - (element-transform normalize-line-endings - (match (+ (alphabet char-set:xml-whitespace))))))) + (encapsulate vector->list + (* (top-level + (alt parse-comment + parse-processing-instructions + (element-transform normalize-line-endings + (match (+ (alphabet char-set:xml-whitespace)))))))))) (define parse-declaration ;[23,24,32,80] (*parser - (encapsulate (lambda (v) (transform-declaration (vector-ref v 0))) - (sbracket "XML declaration" "" - parse-attribute-list)))) + (top-level + (transform (lambda (v) (transform-declaration (vector-ref v 0))) + (sbracket "XML declaration" "" + parse-attribute-list))))) (define (transform-declaration attributes) (let ((finish @@ -257,6 +202,9 @@ ;;;; Elements +(define parse-root-element + (*parser (top-level parse-element))) + (define (parse-element buffer) ;[39] (let ((p (get-parser-buffer-pointer buffer))) (let ((v (parse-start-tag buffer))) @@ -286,16 +234,18 @@ (define parse-start-tag ;[40,44] (*parser - (bracket "start tag" - (seq (noise (string "<")) maybe-parse-name) - (match (alt (string ">") (string "/>"))) - parse-attribute-list))) + (top-level + (bracket "start tag" + (seq (noise (string "<")) maybe-parse-name) + (match (alt (string ">") (string "/>"))) + parse-attribute-list)))) (define parse-end-tag ;[42] (*parser - (sbracket "end tag" "" - parse-name - S?))) + (top-level + (sbracket "end tag" "" + parse-name + S?)))) (define parse-content ;[43] (*parser @@ -320,6 +270,7 @@ (*parser (sbracket description start end parser)))) (define (terminated-region-parser description alphabet . ends) + description (let ((matcher (lambda (buffer) (let loop () @@ -336,8 +287,12 @@ (terminated-region-parser "character data" char-set:char-data "]]>")) (define parse-comment ;[15] - (let ((parser (bracketed-region-parser "comment" ""))) - (*parser (element-transform make-xml-comment parser)))) + (let ((parse-body + (terminated-region-parser "comment" char-set:xml-char "--"))) + (*parser + (element-transform make-xml-comment + (sbracket "comment" "" + parse-body))))) (define parse-cdata-section ;[18,19,20,21] (bracketed-region-parser "CDATA section" "")) @@ -367,7 +322,7 @@ ;;;; Names and references (define parse-name - (*parser (require-success "malformed XML name" maybe-parse-name))) + (*parser (require-success "Malformed XML name" maybe-parse-name))) (define maybe-parse-name ;[5] (*parser @@ -377,7 +332,7 @@ (define parse-name-token (*parser - (require-success "malformed XML name token" + (require-success "Malformed XML name token" maybe-parse-name-token))) (define maybe-parse-name-token ;[7] @@ -432,9 +387,19 @@ (define parse-attribute-list (*parser - (encapsulate vector->list - (seq (* parse-attribute) - S?)))) + (with-pointer p + (encapsulate + (lambda (v) + (let ((alist (vector->list v))) + (do ((alist alist (cdr alist))) + ((not (pair? alist))) + (let ((entry (assq (caar alist) (cdr alist)))) + (if entry + (error "Duplicate entry in attribute list at" + (parser-buffer-position-string p))))) + alist)) + (seq (* parse-attribute) + S?))))) (define parse-attribute ;[41,25] (*parser @@ -442,7 +407,7 @@ (seq S maybe-parse-name S? - (require-success "missing attribute separator" + (require-success "Missing attribute separator" (noise (string "="))) S? parse-attribute-value)))) @@ -475,58 +440,105 @@ (define parse-attribute-value ;[10] (let ((parser (attribute-value-parser char-set:char-data parse-reference))) - (*parser (require-success "malformed attribute value" parser)))) + (*parser + (element-transform normalize-attribute-value + (require-success "Malformed attribute value" + parser))))) + +;;;; Normalization -(define (coalesce-strings! elements) - (do ((elements elements (cdr elements))) - ((not (pair? elements))) - (if (and (string? (car elements)) - (pair? (cdr elements)) - (string? (cadr elements))) - (begin - (set-car! elements - (string-append (car elements) - (cadr elements))) - (set-cdr! elements (cddr elements)))))) +(define (normalize-line-endings string #!optional always-copy?) + (if (string-find-next-char string #\return) + (let ((end (string-length string))) + (let ((step-over-eol + (lambda (index) + (fix:+ index + (if (and (fix:< (fix:+ index 1) end) + (char=? (string-ref string (fix:+ index 1)) + #\linefeed)) + 2 + 1))))) + (let ((n + (let loop ((start 0) (n 0)) + (let ((index + (substring-find-next-char string start end + #\return))) + (if index + (loop (step-over-eol index) + (fix:+ n (fix:+ (fix:- index start) 1))) + (fix:+ n (fix:- end start))))))) + (let ((string* (make-string n))) + (let loop ((start 0) (start* 0)) + (let ((index + (substring-find-next-char string start end + #\return))) + (if index + (let ((start* + (substring-move! string start index + string* start*))) + (string-set! string* start* #\newline) + (loop (step-over-eol index) + (fix:+ start* 1))) + (substring-move! string start end string* start*)))) + string*)))) + (if (and (not (default-object? always-copy?)) + always-copy?) + (string-copy string) + string))) + +(define (normalize-attribute-value value) + (cond ((pair? value) + (map normalize-attribute-value value)) + ((string? value) + (let ((string (normalize-line-endings value #t))) + (let ((n (string-length string))) + (do ((i 0 (fix:+ i 1))) + ((fix:= i n)) + (if (or (char=? (string-ref string i) #\tab) + (char=? (string-ref string i) #\newline)) + (string-set! string i #\space)))) + string)) + (else value))) ;;;; Document-type declarations (define parse-dtd ;[28] (*parser - (encapsulate - (lambda (v) - (make-xml-dtd (vector-ref v 0) - (vector-ref v 1) - (vector-ref v 2))) - (sbracket "document-type declaration" "" - (require-success "Malformed document type" - (seq S - parse-name - (alt (seq S - parse-external-id) - (values #f)) - S? - (alt (seq (encapsulate vector->list - (sbracket "internal DTD" "[" "]" - (* (alt parse-markup-decl - parse-decl-separator)))) - S?) - (values #f)))))))) - -(define parse-markup-decl ;[29] - (*parser - (alt parse-element-decl - parse-attlist-decl - parse-entity-decl - parse-notation-decl - parse-processing-instructions - parse-comment))) + (top-level + (encapsulate + (lambda (v) + (make-xml-dtd (vector-ref v 0) + (vector-ref v 1) + (vector-ref v 2))) + (sbracket "document-type declaration" "" + (require-success "Malformed document type" + (seq S + parse-name + (alt (seq S + parse-external-id) + (values #f)) + S? + (alt (seq (encapsulate vector->list + (sbracket "internal DTD" "[" "]" + (* (alt parse-markup-decl + parse-decl-separator)))) + S?) + (values #f))))))))) (define parse-decl-separator ;[28a] (*parser (alt parse-parameter-entity-reference S))) +(define parse-markup-decl ;[29] + (*parser + (alt parse-!element + parse-!attlist + parse-!entity + parse-!notation + parse-processing-instructions + parse-comment))) + (define parse-external-subset ;[30] (*parser (seq (? parse-text-decl) @@ -538,12 +550,12 @@ parse-conditional-section parse-decl-separator)))) -(define parse-element-decl ;[45] +(define parse-!element ;[45] (letrec ((parse-children ;[47,49,50] (*parser (encapsulate encapsulate-suffix - (seq (sbracket "element-declaration type" "(" ")" + (seq (sbracket "!ELEMENT type" "(" ")" S? (alt (encapsulate (lambda (v) (cons 'ALT (vector->list v))) (seq parse-cp @@ -576,9 +588,7 @@ (*parser (encapsulate - (lambda (v) - (make-xml-element-declaration (vector-ref v 0) - (vector-ref v 1))) + (lambda (v) (make-xml-!element (vector-ref v 0) (vector-ref v 1))) (sbracket "element declaration" "" S parse-name @@ -588,26 +598,30 @@ (element-transform xml-intern (match (string "ANY"))) ;;[51] (encapsulate (lambda (v) (cons 'MIX (vector->list v))) - (alt (sbracket "element-declaration type" "(" ")" + (with-pointer p + (seq (noise (string "(")) S? (noise (string "#PCDATA")) - S?) - (sbracket "element-declaration type" "(" ")*" - S? - (noise (string "#PCDATA")) - (* (seq S? - (noise (string "|")) - S? - parse-name)) - S?))) + (alt (seq S? + (noise (string ")"))) + (seq (* (seq S? + (noise (string "|")) + S? + parse-name)) + S? + (noise (string ")*"))) + + (sexp + (lambda (buffer) + buffer + (error "Unterminated !ELEMENT type at" + (parser-buffer-position-string p)))))))) parse-children)))))) -(define parse-attlist-decl ;[52,53] +(define parse-!attlist ;[52,53] (*parser (encapsulate - (lambda (v) - (make-xml-attribute-declaration (vector-ref v 0) - (vector-ref v 1))) + (lambda (v) (make-xml-!attlist (vector-ref v 0) (vector-ref v 1))) (sbracket "attribute-list declaration" "" S parse-name @@ -669,60 +683,14 @@ parse-attribute-value)))))) S?)))) -(define parse-conditional-section ;[61] - (*parser - (alt parse-include-section - parse-ignore-section))) - -(define-integrable conditional-start "") - -(define parse-include-section ;[62] - (*parser - (encapsulate - (lambda (v) - (make-xml-include-section (vector->list v))) - (bracket "include section" - (seq (noise (string conditional-start)) - S? - (noise (string "INCLUDE")) - S? - (noise (string "["))) - (noise (string conditional-end)) - parse-external-subset-decl)))) - -(define parse-ignore-section ;[63] - (*parser - (encapsulate - (lambda (v) - (make-xml-ignore-section (vector->list v))) - (bracket "ignore section" - (seq (noise (string conditional-start)) - S? - (noise (string "IGNORE")) - S? - (noise (string "["))) - (noise (string conditional-end)) - (* parse-ignore-section-contents))))) - -(define parse-ignore-section-contents ;[64,65] - (let ((parser - (terminated-region-parser "ignore section" char-set:xml-char - conditional-start conditional-end))) - (*parser - (seq parser - (* (seq (sbracket "ignore section" conditional-start conditional-end - parse-ignore-section-contents) - parser)))))) - -(define parse-entity-decl ;[70,71,72,73,74] +(define parse-!entity ;[70,71,72,73,74,76] (*parser (sbracket "entity declaration" "" (seq S (alt (encapsulate (lambda (v) - (make-xml-parameter-entity-declaration (vector-ref v 0) - (vector-ref v 1))) + (make-xml-parameter-!entity (vector-ref v 0) + (vector-ref v 1))) (seq (noise (string "%")) S parse-name @@ -731,15 +699,37 @@ parse-external-id))) (encapsulate (lambda (v) - (make-xml-entity-declaration (vector-ref v 0) - (vector-ref v 1))) + (if (fix:= (vector-length v) 2) + (make-xml-!entity (vector-ref v 0) (vector-ref v 1)) + (make-xml-unparsed-!entity (vector-ref v 0) + (vector-ref v 1) + (vector-ref v 2)))) (seq parse-name S (alt parse-entity-value (seq parse-external-id - (? parse-ndata-decl)))))) + (? (seq S + (noise (string "NDATA")) + S + parse-name))))))) S?)))) +(define parse-!notation ;[82,83] + (*parser + (encapsulate + (lambda (v) (make-xml-!notation (vector-ref v 0) (vector-ref v 1))) + (sbracket "notation declaration" "" + S + parse-name + S + (alt parse-external-id + (encapsulate + (lambda (v) (make-xml-external-id (vector-ref v 0) #f)) + (seq (noise (string "PUBLIC")) + S + parse-public-id-literal))) + S?)))) + (define parse-external-id ;[75] (*parser (alt (encapsulate @@ -764,36 +754,56 @@ (alt (sbracket description "\"" "\"" (match (* (alphabet a1)))) (sbracket description "'" "'" (match (* (alphabet a2)))))))) +(define parse-system-literal ;[11] + (string-parser "system literal" char-set:xml-char)) + (define parse-public-id-literal ;[12,13] (string-parser "public-ID literal" (char-set-union char-set:alphanumeric (string->char-set " \r\n-'()+,./:=?;!*#@$_%")))) - -(define parse-system-literal ;[11] - (string-parser "system literal" char-set:xml-char)) -(define parse-ndata-decl ;[76] +(define parse-conditional-section ;[61] (*parser - (seq S - (noise (string "NDATA")) - S - parse-name))) + (alt parse-!include + parse-!ignore))) + +(define-integrable conditional-start "") -(define parse-notation-decl ;[82,83] +(define parse-!include ;[62] (*parser - (sbracket "notation declaration" "" - S - parse-name - S - (alt parse-external-id - (encapsulate - (lambda (v) - (make-xml-external-id (vector-ref v 0) #f)) - (seq (noise (string "PUBLIC")) - S - parse-public-id-literal))) - S?))) + (encapsulate (lambda (v) (make-xml-!include (vector->list v))) + (bracket "include section" + (seq (noise (string conditional-start)) + S? + (noise (string "INCLUDE")) + S? + (noise (string "["))) + (noise (string conditional-end)) + parse-external-subset-decl)))) + +(define parse-!ignore ;[63] + (*parser + (encapsulate (lambda (v) (make-xml-!ignore (vector->list v))) + (bracket "ignore section" + (seq (noise (string conditional-start)) + S? + (noise (string "IGNORE")) + S? + (noise (string "["))) + (noise (string conditional-end)) + (* parse-!ignore-contents))))) + +(define parse-!ignore-contents ;[64,65] + (let ((parser + (terminated-region-parser "ignore section" char-set:xml-char + conditional-start conditional-end))) + (*parser + (seq parser + (* (seq (sbracket "ignore section" conditional-start conditional-end + parse-!ignore-contents) + parser)))))) ;;; Edwin Variables: ;;; Eval: (scheme-indent-method 'encapsulate 1) diff --git a/v7/src/xml/xml-struct.scm b/v7/src/xml/xml-struct.scm index 1f966a3a0..980a8fc3c 100644 --- a/v7/src/xml/xml-struct.scm +++ b/v7/src/xml/xml-struct.scm @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: xml-struct.scm,v 1.1 2001/07/05 20:47:53 cph Exp $ +;;; $Id: xml-struct.scm,v 1.2 2001/07/06 20:50:49 cph Exp $ ;;; ;;; Copyright (c) 2001 Massachusetts Institute of Technology ;;; @@ -107,56 +107,63 @@ id uri) -(define-structure (xml-element-declaration +(define-structure (xml-!element (print-procedure - (standard-unparser-method 'XML-ELEMENT-DECLARATION + (standard-unparser-method 'XML-!ELEMENT (lambda (element port) (write-char #\space port) - (write (xml-element-declaration-name element) port))))) + (write (xml-!element-name element) port))))) name content-type) -(define-structure (xml-attribute-declaration +(define-structure (xml-!attlist (print-procedure - (standard-unparser-method 'XML-ATTRIBUTE-DECLARATION + (standard-unparser-method 'XML-!ATTLIST (lambda (element port) (write-char #\space port) - (write (xml-attribute-declaration-name element) - port))))) + (write (xml-!attlist-name element) port))))) name definitions) -(define-structure xml-include-section +(define-structure xml-!include contents) -(define-structure xml-ignore-section +(define-structure xml-!ignore contents) -(define-structure (xml-entity-declaration +(define-structure (xml-!entity (print-procedure - (standard-unparser-method 'XML-ENTITY-DECLARATION + (standard-unparser-method 'XML-!ENTITY (lambda (element port) (write-char #\space port) - (write (xml-entity-declaration-name element) port))))) + (write (xml-!entity-name element) port))))) name value) -(define-structure (xml-parameter-entity-declaration +(define-structure (xml-unparsed-!entity (print-procedure - (standard-unparser-method 'XML-PARAMETER-ENTITY-DECLARATION + (standard-unparser-method 'XML-UNPARSED-!ENTITY (lambda (element port) (write-char #\space port) - (write (xml-parameter-entity-declaration-name element) - port))))) + (write (xml-unparsed-!entity-name element) port))))) + name + id + notation) + +(define-structure (xml-parameter-!entity + (print-procedure + (standard-unparser-method 'XML-PARAMETER-!ENTITY + (lambda (element port) + (write-char #\space port) + (write (xml-parameter-!entity-name element) port))))) name value) -(define-structure (xml-notation-declaration +(define-structure (xml-!notation (print-procedure - (standard-unparser-method 'XML-NOTATION-DECLARATION + (standard-unparser-method 'XML-!NOTATION (lambda (element port) (write-char #\space port) - (write (xml-notation-declaration-name element) - port))))) + (write (xml-!notation-name element) port))))) name - value) \ No newline at end of file + id) \ No newline at end of file diff --git a/v7/src/xml/xml.pkg b/v7/src/xml/xml.pkg new file mode 100644 index 000000000..17ec3cfbc --- /dev/null +++ b/v7/src/xml/xml.pkg @@ -0,0 +1,147 @@ +;;; -*-Scheme-*- +;;; +;;; $Id: xml.pkg,v 1.1 2001/07/06 20:50:45 cph Exp $ +;;; +;;; Copyright (c) 2001 Massachusetts Institute of Technology +;;; +;;; This program is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation; either version 2 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program; if not, write to the Free Software +;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +;;; 02111-1307, USA. + +;;;; XML: packaging + +(global-definitions "$brun/runtime") +(global-definitions "../parser/parser") + +(define-package (runtime xml) + (files) + (parent ())) + +(define-package (runtime xml structure) + (files "xml-struct") + (parent ()) + (export () + make-xml-!attlist + make-xml-!element + make-xml-!entity + make-xml-!ignore + make-xml-!include + make-xml-!notation + make-xml-comment + make-xml-declaration + make-xml-document + make-xml-dtd + make-xml-element + make-xml-entity-reference + make-xml-external-id + make-xml-parameter-!entity + make-xml-parameter-entity-reference + make-xml-processing-instructions + make-xml-unparsed-!entity + set-xml-!attlist-definitions! + set-xml-!attlist-name! + set-xml-!element-content-type! + set-xml-!element-name! + set-xml-!entity-name! + set-xml-!entity-value! + set-xml-!ignore-contents! + set-xml-!include-contents! + set-xml-!notation-id! + set-xml-!notation-name! + set-xml-comment-text! + set-xml-declaration-encoding! + set-xml-declaration-standalone?! + set-xml-declaration-version! + set-xml-document-declaration! + set-xml-document-dtd! + set-xml-document-misc-1! + set-xml-document-misc-2! + set-xml-document-misc-3! + set-xml-document-root! + set-xml-dtd-external! + set-xml-dtd-internal! + set-xml-dtd-root! + set-xml-element-attributes! + set-xml-element-contents! + set-xml-element-name! + set-xml-entity-reference-name! + set-xml-external-id-id! + set-xml-external-id-uri! + set-xml-parameter-!entity-name! + set-xml-parameter-!entity-value! + set-xml-processing-instructions-name! + set-xml-processing-instructions-text! + set-xml-unparsed-!entity-id! + set-xml-unparsed-!entity-name! + set-xml-unparsed-!entity-notation! + xml-!attlist-definitions + xml-!attlist-name + xml-!attlist? + xml-!element-content-type + xml-!element-name + xml-!element? + xml-!entity-name + xml-!entity-value + xml-!entity? + xml-!ignore-contents + xml-!ignore? + xml-!include-contents + xml-!include? + xml-!notation-id + xml-!notation-name + xml-!notation? + xml-comment-text + xml-comment? + xml-declaration-encoding + xml-declaration-standalone? + xml-declaration-version + xml-declaration? + xml-document-declaration + xml-document-dtd + xml-document-misc-1 + xml-document-misc-2 + xml-document-misc-3 + xml-document-root + xml-document? + xml-dtd-external + xml-dtd-internal + xml-dtd-root + xml-dtd? + xml-element-attributes + xml-element-contents + xml-element-name + xml-element? + xml-entity-reference-name + xml-entity-reference? + xml-external-id-id + xml-external-id-uri + xml-external-id? + xml-intern + xml-parameter-!entity-name + xml-parameter-!entity-value + xml-parameter-!entity? + xml-parameter-entity-reference? + xml-processing-instructions-name + xml-processing-instructions-text + xml-processing-instructions? + xml-unparsed-!entity-id + xml-unparsed-!entity-name + xml-unparsed-!entity-notation + xml-unparsed-!entity?)) + +(define-package (runtime xml parser) + (files "xml-parser") + (parent ()) + (export () + parse-xml-document)) \ No newline at end of file -- 2.25.1