Package the code. Move parser macros to their own file so that they
authorChris Hanson <org/chris-hanson/cph>
Fri, 6 Jul 2001 20:50:49 +0000 (20:50 +0000)
committerChris Hanson <org/chris-hanson/cph>
Fri, 6 Jul 2001 20:50:49 +0000 (20:50 +0000)
can be loaded during compilation.

Restructure the top-level parser to allow passing the DTD down to the
element parser for entity expansion.

Implement attribute-value normalization.

Add restrictions required for rejection of non-wf documents: no "--"
in comments; no duplicate attribute names.

Change names of DTD structures to reflect their keywords.

Fix various minor bugs, discovered either by the test suite or by the
compiler.

v7/src/xml/compile.scm [new file with mode: 0644]
v7/src/xml/ed-ffi.scm [new file with mode: 0644]
v7/src/xml/load.scm [new file with mode: 0644]
v7/src/xml/parser-macro.scm [new file with mode: 0644]
v7/src/xml/xml-parser.scm
v7/src/xml/xml-struct.scm
v7/src/xml/xml.pkg [new file with mode: 0644]

diff --git a/v7/src/xml/compile.scm b/v7/src/xml/compile.scm
new file mode 100644 (file)
index 0000000..92ede48
--- /dev/null
@@ -0,0 +1,33 @@
+;;; -*-Scheme-*-
+;;;
+;;; $Id: compile.scm,v 1.1 2001/07/06 20:50:37 cph Exp $
+;;;
+;;; Copyright (c) 2001 Massachusetts Institute of Technology
+;;;
+;;; This program is free software; you can redistribute it and/or
+;;; modify it under the terms of the GNU General Public License as
+;;; published by the Free Software Foundation; either version 2 of the
+;;; License, or (at your option) any later version.
+;;;
+;;; This program is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;; General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with this program; if not, write to the Free Software
+;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+;;; 02111-1307, USA.
+
+(load-option 'CREF)
+(with-working-directory-pathname (directory-pathname (current-load-pathname))
+  (lambda ()
+    (load "parser-macro")
+    (for-each compile-file
+             '("xml-struct"
+               "xml-parser"
+               ;;"xml-output"
+               ))
+    (cref/generate-constructors "xml")
+    (sf "xml.con")
+    (sf "xml.ldr")))
\ No newline at end of file
diff --git a/v7/src/xml/ed-ffi.scm b/v7/src/xml/ed-ffi.scm
new file mode 100644 (file)
index 0000000..0e0b4b8
--- /dev/null
@@ -0,0 +1,26 @@
+;;; -*-Scheme-*-
+;;;
+;;; $Id: ed-ffi.scm,v 1.1 2001/07/06 20:50:39 cph Exp $
+;;;
+;;; Copyright (c) 2001 Massachusetts Institute of Technology
+;;;
+;;; This program is free software; you can redistribute it and/or
+;;; modify it under the terms of the GNU General Public License as
+;;; published by the Free Software Foundation; either version 2 of the
+;;; License, or (at your option) any later version.
+;;;
+;;; This program is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;; General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with this program; if not, write to the Free Software
+;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+;;; 02111-1307, USA.
+
+;;;; XML: Edwin buffer packaging info
+
+(standard-scheme-find-file-initialization
+ '#(("xml-struct" (runtime xml structure) system-global-syntax-table)
+    ("xml-parser" (runtime xml parser) system-global-syntax-table)))
\ No newline at end of file
diff --git a/v7/src/xml/load.scm b/v7/src/xml/load.scm
new file mode 100644 (file)
index 0000000..b174440
--- /dev/null
@@ -0,0 +1,24 @@
+;;; -*-Scheme-*-
+;;;
+;;; $Id: load.scm,v 1.1 2001/07/06 20:50:41 cph Exp $
+;;;
+;;; Copyright (c) 2001 Massachusetts Institute of Technology
+;;;
+;;; This program is free software; you can redistribute it and/or
+;;; modify it under the terms of the GNU General Public License as
+;;; published by the Free Software Foundation; either version 2 of the
+;;; License, or (at your option) any later version.
+;;;
+;;; This program is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;; General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with this program; if not, write to the Free Software
+;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+;;; 02111-1307, USA.
+
+(with-working-directory-pathname (directory-pathname (current-load-pathname))
+  (lambda ()
+    (package/system-loader "xml" '() 'QUERY)))
\ No newline at end of file
diff --git a/v7/src/xml/parser-macro.scm b/v7/src/xml/parser-macro.scm
new file mode 100644 (file)
index 0000000..6fc9c9d
--- /dev/null
@@ -0,0 +1,58 @@
+;;; -*-Scheme-*-
+;;;
+;;; $Id: parser-macro.scm,v 1.1 2001/07/06 20:50:43 cph Exp $
+;;;
+;;; Copyright (c) 2001 Massachusetts Institute of Technology
+;;;
+;;; This program is free software; you can redistribute it and/or
+;;; modify it under the terms of the GNU General Public License as
+;;; published by the Free Software Foundation; either version 2 of the
+;;; License, or (at your option) any later version.
+;;;
+;;; This program is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;; General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with this program; if not, write to the Free Software
+;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+;;; 02111-1307, USA.
+
+;;;; XML parser macros
+
+(declare (usual-integrations))
+
+(define-*parser-macro S                        ;[3]
+  `(NOISE (+ (ALPHABET CHAR-SET:XML-WHITESPACE))))
+
+(define-*parser-macro S?
+  `(NOISE (* (ALPHABET CHAR-SET:XML-WHITESPACE))))
+
+(define-*parser-macro (bracket description open close . body)
+  (let ((v (generate-uninterned-symbol)))
+    `(WITH-POINTER ,v
+       (SEQ ,open
+           ,@body
+           (ALT ,close
+                (SEXP
+                 (LAMBDA (BUFFER)
+                   BUFFER
+                   (ERROR
+                    ,(if (string? description)
+                         (string-append "Unterminated " description " at")
+                         `(STRING-APPEND "Unterminated " ,description " at"))
+                    (PARSER-BUFFER-POSITION-STRING ,v)))))))))
+
+(define-*parser-macro (sbracket description open close . body)
+  `(BRACKET ,description (NOISE (STRING ,open)) (NOISE (STRING ,close))
+     ,@body))
+
+(define-*parser-macro (require-success message body)
+  `(ALT ,body
+       (SEXP
+        (LAMBDA (BUFFER)
+          (ERROR ,(if (string? message)
+                      (string-append message " at")
+                      `(STRING-APPEND ,message " at"))
+                 (PARSER-BUFFER-POSITION-STRING BUFFER))))))
\ No newline at end of file
index 2495096942e0d9b306b2d36b0d0388fe30f1c89b..4d193230a44872b40784056a295df65b8e82adf1 100644 (file)
@@ -1,6 +1,6 @@
 ;;; -*-Scheme-*-
 ;;;
-;;; $Id: xml-parser.scm,v 1.2 2001/07/05 20:47:41 cph Exp $
+;;; $Id: xml-parser.scm,v 1.3 2001/07/06 20:50:47 cph Exp $
 ;;;
 ;;; Copyright (c) 2001 Massachusetts Institute of Technology
 ;;;
@@ -25,9 +25,6 @@
 ;;; standard, second edition, 6 October 2000.  Each such comment marks
 ;;; the code that corresponds to that rule.
 
-;;; **** TO DO ****
-;;; * Attribute-value normalization (p. 29).
-
 (declare (usual-integrations))
 \f
 ;;;; Utilities
 (define char-set:char-data
   (char-set-difference char-set:xml-char (char-set #\< #\&)))
 
-(define-*parser-macro S                        ;[3]
-  `(NOISE (+ (ALPHABET CHAR-SET:XML-WHITESPACE))))
-
-(define-*parser-macro S?
-  `(NOISE (* (ALPHABET CHAR-SET:XML-WHITESPACE))))
-
 (define char-set:xml-whitespace
   (char-set #\space #\tab #\return #\linefeed))
 
-(define-*parser-macro (bracket description open close . body)
-  (let ((v (generate-uninterned-symbol)))
-    `(WITH-POINTER ,v
-       (SEQ ,open
-           ,@body
-           (ALT ,close
-                (SEXP
-                 (LAMBDA (BUFFER)
-                   BUFFER
-                   (ERROR
-                    ,(if (string? description)
-                         (string-append "Unterminated " description " at")
-                         `(STRING-APPEND "Unterminated " ,description " at"))
-                    (PARSER-BUFFER-POSITION-STRING ,v)))))))))
-
-(define-*parser-macro (sbracket description open close . body)
-  `(BRACKET ,description (NOISE (STRING ,open)) (NOISE (STRING ,close))
-     ,@body))
-
-(define-*parser-macro (require-success message body)
-  `(ALT ,body
-       (SEXP
-        (LAMBDA (BUFFER)
-          (ERROR ,(if (string? message)
-                      (string-append message " at")
-                      `(STRING-APPEND ,message " at"))
-                 (PARSER-BUFFER-POSITION-STRING BUFFER))))))
+(define (coalesce-strings! elements)
+  (do ((elements elements (cdr elements)))
+      ((not (pair? elements)))
+    (if (and (string? (car elements))
+            (pair? (cdr elements))
+            (string? (cadr elements)))
+       (begin
+         (set-car! elements
+                   (string-append (car elements)
+                                  (cadr elements)))
+         (set-cdr! elements (cddr elements))))))
 \f
 (define (make-xml-char-reference n)
-  (if (not (or (= n #x9)
-              (= n #xA)
-              (= n #xD)
-              (<= #x20 n #xD7FF)
-              (<= #xE000 n #xFFFD)
-              (<= #x10000 n #x10FFFF)))
+  (if (not (valid-xml-code-point? n))
       (error "Disallowed Unicode character code:" n))
   (integer->unicode-string n))
 
+(define (valid-xml-code-point? n)
+  (and (< n #x110000)
+       (if (< n #xD800)
+          (or (>= n #x20)
+              (= n #x9)
+              (= n #xA)
+              (= n #xD))
+          (and (>= n #xE000)
+               (not (or (= n #xFFFE)
+                        (= n #xFFFF)))))))
+
 (define (integer->unicode-string n)
-  (let ((initial-char
-        (lambda (n offset)
-          (integer->char
-           (fix:or (fix:and (fix:lsh #xFF (fix:+ n 1)) #xFF)
-                   (fix:lsh n -6)))))
-       (subsequent-char
-        (lambda (offset)
-          (integer->char
-           (fix:or #x80
-                   (fix:and (fix:lsh n (fix:- 0 offset))
-                            #x3F))))))
-    (declare (integrate-operator initial-char subsequent-char))
-    (if (not (and (<= 0 n) (< n #x80000000)))
-       (error:bad-range-argument n 'INTEGER->UNICODE-STRING))
-    (cond ((< n #x00000080)
-          (string (integer->char n)))
-         ((< n #x00000800)
-          (string (initial-char 5 6)
-                  (subsequent-char 6)))
-         ((< n #x00010000)
-          (string (initial-char 4 12)
-                  (subsequent-char 12)
-                  (subsequent-char 6)))
-         ((< n #x00200000)
-          (string (initial-char 3 18)
-                  (subsequent-char 18)
-                  (subsequent-char 12)
-                  (subsequent-char 6)))
-         ((< n #x04000000)
-          (string (initial-char 2 24)
-                  (subsequent-char 24)
-                  (subsequent-char 18)
-                  (subsequent-char 12)
-                  (subsequent-char 6)))
-         (else
-          (string (initial-char 1 30)
-                  (subsequent-char 30)
-                  (subsequent-char 24)
-                  (subsequent-char 18)
-                  (subsequent-char 12)
-                  (subsequent-char 6))))))
-\f
-(define (normalize-line-endings string)
-  (if (string-find-next-char string #\return)
-      (let ((end (string-length string)))
-       (let ((step-over-eol
-              (lambda (index)
-                (fix:+ index
-                       (if (and (fix:< (fix:+ index 1) end)
-                                (char=? (string-ref string (fix:+ index 1))
-                                        #\linefeed))
-                           2
-                           1)))))
-         (let ((n
-                (let loop ((start 0) (n 0))
-                  (let ((index
-                         (substring-find-next-char string start end
-                                                   #\return)))
-                    (if index
-                        (loop (step-over-eol index)
-                              (fix:+ n (fix:+ (fix:- index start) 1)))
-                        (fix:+ n (fix:- end start)))))))
-           (let ((string* (make-string n)))
-             (let loop ((start 0) (start* 0))
-               (let ((index
-                      (substring-find-next-char string start end
-                                                #\return)))
-                 (if index
-                     (let ((start*
-                            (substring-move! string start index
-                                             string* start*)))
-                       (string-set! string* start* #\newline)
-                       (loop (step-over-eol index)
-                             (fix:+ start* 1)))
-                     (substring-move! string start end string* start*))))
-             string*))))
-      string))
+
+  (define-integrable (initial-char n offset)
+    (integer->char
+     (fix:or (fix:and (fix:lsh #xFF (fix:+ n 1)) #xFF)
+            (fix:lsh n (fix:- 0 offset)))))
+
+  (define-integrable (subsequent-char offset)
+    (integer->char
+     (fix:or #x80
+            (fix:and (fix:lsh n (fix:- 0 offset)) #x3F))))
+
+  (if (not (and (<= 0 n) (< n #x80000000)))
+      (error:bad-range-argument n 'INTEGER->UNICODE-STRING))
+  (cond ((< n #x00000080)
+        (string (integer->char n)))
+       ((< n #x00000800)
+        (string (initial-char 5 6)
+                (subsequent-char 6)))
+       ((< n #x00010000)
+        (string (initial-char 4 12)
+                (subsequent-char 12)
+                (subsequent-char 6)))
+       ((< n #x00200000)
+        (string (initial-char 3 18)
+                (subsequent-char 18)
+                (subsequent-char 12)
+                (subsequent-char 6)))
+       ((< n #x04000000)
+        (string (initial-char 2 24)
+                (subsequent-char 24)
+                (subsequent-char 18)
+                (subsequent-char 12)
+                (subsequent-char 6)))
+       (else
+        (string (initial-char 1 30)
+                (subsequent-char 30)
+                (subsequent-char 24)
+                (subsequent-char 18)
+                (subsequent-char 12)
+                (subsequent-char 6)))))
 \f
 ;;;; Top level
 
-(define parse-xml-document             ;[1,22]
-  (*parser
-   (transform
-       (lambda (v)
-        (make-xml-document (vector-ref v 0)
-                           (vector-ref v 1)
-                           (vector-ref v 2)
-                           (vector-ref v 3)
-                           (vector-ref v 4)
-                           (vector-ref v 5)))
-     (complete
-      (seq (alt (top-level parse-declaration)
-               (values #f))
-          (encapsulate vector->list
-            (* (top-level parse-misc)))
-          (alt (seq (top-level parse-dtd)
-                    (encapsulate vector->list
-                      (* (top-level parse-misc))))
-               (values #f '()))
-          (top-level parse-element)
-          (encapsulate vector->list
-            (* (top-level parse-misc))))))))
+(define (parse-xml-document buffer)    ;[1,22]
+  (let* ((declaration (parse-declaration buffer))
+        (misc-1 (parse-misc buffer))
+        (dtd (parse-dtd buffer))
+        (misc-2 (if dtd (parse-misc buffer) '()))
+        (element
+         (fluid-let ((*dtd* dtd))
+           (parse-root-element buffer)))
+        (misc-3 (parse-misc buffer)))
+    (if (peek-parser-buffer-char buffer)
+       (error "Unparsed content in input at"
+              (parser-buffer-position-string buffer)))
+    (make-xml-document declaration
+                      misc-1
+                      dtd
+                      misc-2
+                      element
+                      misc-3)))
+
+(define *dtd*)
 
 (define parse-misc                     ;[27]
   (*parser
-   (alt parse-comment
-       parse-processing-instructions
-       (element-transform normalize-line-endings
-         (match (+ (alphabet char-set:xml-whitespace)))))))
+   (encapsulate vector->list
+     (* (top-level
+        (alt parse-comment
+             parse-processing-instructions
+             (element-transform normalize-line-endings
+               (match (+ (alphabet char-set:xml-whitespace))))))))))
 \f
 (define parse-declaration              ;[23,24,32,80]
   (*parser
-   (encapsulate (lambda (v) (transform-declaration (vector-ref v 0)))
-     (sbracket "XML declaration" "<?xml" "?>"
-       parse-attribute-list))))
+   (top-level
+    (transform (lambda (v) (transform-declaration (vector-ref v 0)))
+      (sbracket "XML declaration" "<?xml" "?>"
+       parse-attribute-list)))))
 
 (define (transform-declaration attributes)
   (let ((finish
 \f
 ;;;; Elements
 
+(define parse-root-element
+  (*parser (top-level parse-element)))
+
 (define (parse-element buffer)         ;[39]
   (let ((p (get-parser-buffer-pointer buffer)))
     (let ((v (parse-start-tag buffer)))
 
 (define parse-start-tag                        ;[40,44]
   (*parser
-   (bracket "start tag"
-       (seq (noise (string "<")) maybe-parse-name)
-       (match (alt (string ">") (string "/>")))
-     parse-attribute-list)))
+   (top-level
+    (bracket "start tag"
+       (seq (noise (string "<")) maybe-parse-name)
+       (match (alt (string ">") (string "/>")))
+      parse-attribute-list))))
 
 (define parse-end-tag                  ;[42]
   (*parser
-   (sbracket "end tag" "</" ">"
-     parse-name
-     S?)))
+   (top-level
+    (sbracket "end tag" "</" ">"
+      parse-name
+      S?))))
 
 (define parse-content                  ;[43]
   (*parser
     (*parser (sbracket description start end parser))))
 
 (define (terminated-region-parser description alphabet . ends)
+  description
   (let ((matcher
         (lambda (buffer)
           (let loop ()
   (terminated-region-parser "character data" char-set:char-data "]]>"))
 
 (define parse-comment                  ;[15]
-  (let ((parser (bracketed-region-parser "comment" "<!--" "-->")))
-    (*parser (element-transform make-xml-comment parser))))
+  (let ((parse-body
+        (terminated-region-parser "comment" char-set:xml-char "--")))
+    (*parser
+     (element-transform make-xml-comment
+       (sbracket "comment" "<!--" "-->"
+        parse-body)))))
 
 (define parse-cdata-section            ;[18,19,20,21]
   (bracketed-region-parser "CDATA section" "<![CDATA[" "]]>"))
 ;;;; Names and references
 
 (define parse-name
-  (*parser (require-success "malformed XML name" maybe-parse-name)))
+  (*parser (require-success "Malformed XML name" maybe-parse-name)))
 
 (define maybe-parse-name               ;[5]
   (*parser
 
 (define parse-name-token
   (*parser
-   (require-success "malformed XML name token"
+   (require-success "Malformed XML name token"
      maybe-parse-name-token)))
 
 (define maybe-parse-name-token         ;[7]
 
 (define parse-attribute-list
   (*parser
-   (encapsulate vector->list
-     (seq (* parse-attribute)
-         S?))))
+   (with-pointer p
+     (encapsulate
+        (lambda (v)
+          (let ((alist (vector->list v)))
+            (do ((alist alist (cdr alist)))
+                ((not (pair? alist)))
+              (let ((entry (assq (caar alist) (cdr alist))))
+                (if entry
+                    (error "Duplicate entry in attribute list at"
+                           (parser-buffer-position-string p)))))
+            alist))
+       (seq (* parse-attribute)
+           S?)))))
 
 (define parse-attribute                        ;[41,25]
   (*parser
      (seq S
          maybe-parse-name
          S?
-         (require-success "missing attribute separator"
+         (require-success "Missing attribute separator"
            (noise (string "=")))
          S?
          parse-attribute-value))))
 
 (define parse-attribute-value          ;[10]
   (let ((parser (attribute-value-parser char-set:char-data parse-reference)))
-    (*parser (require-success "malformed attribute value" parser))))
+    (*parser
+     (element-transform normalize-attribute-value
+       (require-success "Malformed attribute value"
+        parser)))))
+\f
+;;;; Normalization
 
-(define (coalesce-strings! elements)
-  (do ((elements elements (cdr elements)))
-      ((not (pair? elements)))
-    (if (and (string? (car elements))
-            (pair? (cdr elements))
-            (string? (cadr elements)))
-       (begin
-         (set-car! elements
-                   (string-append (car elements)
-                                  (cadr elements)))
-         (set-cdr! elements (cddr elements))))))
+(define (normalize-line-endings string #!optional always-copy?)
+  (if (string-find-next-char string #\return)
+      (let ((end (string-length string)))
+       (let ((step-over-eol
+              (lambda (index)
+                (fix:+ index
+                       (if (and (fix:< (fix:+ index 1) end)
+                                (char=? (string-ref string (fix:+ index 1))
+                                        #\linefeed))
+                           2
+                           1)))))
+         (let ((n
+                (let loop ((start 0) (n 0))
+                  (let ((index
+                         (substring-find-next-char string start end
+                                                   #\return)))
+                    (if index
+                        (loop (step-over-eol index)
+                              (fix:+ n (fix:+ (fix:- index start) 1)))
+                        (fix:+ n (fix:- end start)))))))
+           (let ((string* (make-string n)))
+             (let loop ((start 0) (start* 0))
+               (let ((index
+                      (substring-find-next-char string start end
+                                                #\return)))
+                 (if index
+                     (let ((start*
+                            (substring-move! string start index
+                                             string* start*)))
+                       (string-set! string* start* #\newline)
+                       (loop (step-over-eol index)
+                             (fix:+ start* 1)))
+                     (substring-move! string start end string* start*))))
+             string*))))
+      (if (and (not (default-object? always-copy?))
+              always-copy?)
+         (string-copy string)
+         string)))
+
+(define (normalize-attribute-value value)
+  (cond ((pair? value)
+        (map normalize-attribute-value value))
+       ((string? value)
+        (let ((string (normalize-line-endings value #t)))
+          (let ((n (string-length string)))
+            (do ((i 0 (fix:+ i 1)))
+                ((fix:= i n))
+              (if (or (char=? (string-ref string i) #\tab)
+                      (char=? (string-ref string i) #\newline))
+                  (string-set! string i #\space))))
+          string))
+       (else value)))
 \f
 ;;;; Document-type declarations
 
 (define parse-dtd                      ;[28]
   (*parser
-   (encapsulate
-       (lambda (v)
-        (make-xml-dtd (vector-ref v 0)
-                      (vector-ref v 1)
-                      (vector-ref v 2)))
-     (sbracket "document-type declaration" "<!DOCTYPE" ">"
-       (require-success "Malformed document type"
-        (seq S
-             parse-name
-             (alt (seq S
-                       parse-external-id)
-                  (values #f))
-             S?
-             (alt (seq (encapsulate vector->list
-                         (sbracket "internal DTD" "[" "]"
-                           (* (alt parse-markup-decl
-                                   parse-decl-separator))))
-                       S?)
-                  (values #f))))))))
-
-(define parse-markup-decl              ;[29]
-  (*parser
-   (alt parse-element-decl
-       parse-attlist-decl
-       parse-entity-decl
-       parse-notation-decl
-       parse-processing-instructions
-       parse-comment)))
+   (top-level
+    (encapsulate
+       (lambda (v)
+         (make-xml-dtd (vector-ref v 0)
+                       (vector-ref v 1)
+                       (vector-ref v 2)))
+      (sbracket "document-type declaration" "<!DOCTYPE" ">"
+       (require-success "Malformed document type"
+         (seq S
+              parse-name
+              (alt (seq S
+                        parse-external-id)
+                   (values #f))
+              S?
+              (alt (seq (encapsulate vector->list
+                          (sbracket "internal DTD" "[" "]"
+                            (* (alt parse-markup-decl
+                                    parse-decl-separator))))
+                        S?)
+                   (values #f)))))))))
 
 (define parse-decl-separator           ;[28a]
   (*parser
    (alt parse-parameter-entity-reference
        S)))
 
+(define parse-markup-decl              ;[29]
+  (*parser
+   (alt parse-!element
+       parse-!attlist
+       parse-!entity
+       parse-!notation
+       parse-processing-instructions
+       parse-comment)))
+
 (define parse-external-subset          ;[30]
   (*parser
    (seq (? parse-text-decl)
           parse-conditional-section
           parse-decl-separator))))
 \f
-(define parse-element-decl             ;[45]
+(define parse-!element                 ;[45]
   (letrec
       ((parse-children                 ;[47,49,50]
        (*parser
         (encapsulate encapsulate-suffix
-          (seq (sbracket "element-declaration type" "(" ")"
+          (seq (sbracket "!ELEMENT type" "(" ")"
                  S?
                  (alt (encapsulate (lambda (v) (cons 'ALT (vector->list v)))
                         (seq parse-cp
 
     (*parser
      (encapsulate
-        (lambda (v)
-          (make-xml-element-declaration (vector-ref v 0)
-                                        (vector-ref v 1)))
+        (lambda (v) (make-xml-!element (vector-ref v 0) (vector-ref v 1)))
        (sbracket "element declaration" "<!ELEMENT" ">"
         S
         parse-name
              (element-transform xml-intern (match (string "ANY")))
              ;;[51]
              (encapsulate (lambda (v) (cons 'MIX (vector->list v)))
-               (alt (sbracket "element-declaration type" "(" ")"
+               (with-pointer p
+                 (seq (noise (string "("))
                       S?
                       (noise (string "#PCDATA"))
-                      S?)
-                    (sbracket "element-declaration type" "(" ")*"
-                      S?
-                      (noise (string "#PCDATA"))
-                      (* (seq S?
-                              (noise (string "|"))
-                              S?
-                              parse-name))
-                      S?)))
+                      (alt (seq S?
+                                (noise (string ")")))
+                           (seq (* (seq S?
+                                        (noise (string "|"))
+                                        S?
+                                        parse-name))
+                                S?
+                                (noise (string ")*")))
+
+                           (sexp
+                            (lambda (buffer)
+                              buffer
+                              (error "Unterminated !ELEMENT type at"
+                                     (parser-buffer-position-string p))))))))
              parse-children))))))
 \f
-(define parse-attlist-decl             ;[52,53]
+(define parse-!attlist                 ;[52,53]
   (*parser
    (encapsulate
-       (lambda (v)
-        (make-xml-attribute-declaration (vector-ref v 0)
-                                        (vector-ref v 1)))
+       (lambda (v) (make-xml-!attlist (vector-ref v 0) (vector-ref v 1)))
      (sbracket "attribute-list declaration" "<!ATTLIST" ">"
        S
        parse-name
                          parse-attribute-value))))))
        S?))))
 \f
-(define parse-conditional-section      ;[61]
-  (*parser
-   (alt parse-include-section
-       parse-ignore-section)))
-
-(define-integrable conditional-start "<![")
-(define-integrable conditional-end "]]>")
-
-(define parse-include-section          ;[62]
-  (*parser
-   (encapsulate
-       (lambda (v)
-        (make-xml-include-section (vector->list v)))
-     (bracket "include section"
-        (seq (noise (string conditional-start))
-             S?
-             (noise (string "INCLUDE"))
-             S?
-             (noise (string "[")))
-        (noise (string conditional-end))
-       parse-external-subset-decl))))
-
-(define parse-ignore-section           ;[63]
-  (*parser
-   (encapsulate
-       (lambda (v)
-        (make-xml-ignore-section (vector->list v)))
-     (bracket "ignore section"
-        (seq (noise (string conditional-start))
-             S?
-             (noise (string "IGNORE"))
-             S?
-             (noise (string "[")))
-        (noise (string conditional-end))
-       (* parse-ignore-section-contents)))))
-
-(define parse-ignore-section-contents  ;[64,65]
-  (let ((parser
-        (terminated-region-parser "ignore section" char-set:xml-char
-                                  conditional-start conditional-end)))
-    (*parser
-     (seq parser
-         (* (seq (sbracket "ignore section" conditional-start conditional-end
-                   parse-ignore-section-contents)
-                 parser))))))
-\f
-(define parse-entity-decl              ;[70,71,72,73,74]
+(define parse-!entity                  ;[70,71,72,73,74,76]
   (*parser
    (sbracket "entity declaration" "<!ENTITY" ">"
      (seq S
          (alt (encapsulate
                   (lambda (v)
-                    (make-xml-parameter-entity-declaration (vector-ref v 0)
-                                                           (vector-ref v 1)))
+                    (make-xml-parameter-!entity (vector-ref v 0)
+                                                (vector-ref v 1)))
                 (seq (noise (string "%"))
                      S
                      parse-name
                           parse-external-id)))
               (encapsulate
                   (lambda (v)
-                    (make-xml-entity-declaration (vector-ref v 0)
-                                                 (vector-ref v 1)))
+                    (if (fix:= (vector-length v) 2)
+                        (make-xml-!entity (vector-ref v 0) (vector-ref v 1))
+                        (make-xml-unparsed-!entity (vector-ref v 0)
+                                                   (vector-ref v 1)
+                                                   (vector-ref v 2))))
                 (seq parse-name
                      S
                      (alt parse-entity-value
                           (seq parse-external-id
-                               (? parse-ndata-decl))))))
+                               (? (seq S
+                                       (noise (string "NDATA"))
+                                       S
+                                       parse-name)))))))
          S?))))
 
+(define parse-!notation                        ;[82,83]
+  (*parser
+   (encapsulate
+       (lambda (v) (make-xml-!notation (vector-ref v 0) (vector-ref v 1)))
+     (sbracket "notation declaration" "<!NOTATION" ">"
+       S
+       parse-name
+       S
+       (alt parse-external-id
+           (encapsulate
+               (lambda (v) (make-xml-external-id (vector-ref v 0) #f))
+             (seq (noise (string "PUBLIC"))
+                  S
+                  parse-public-id-literal)))
+       S?))))
+
 (define parse-external-id              ;[75]
   (*parser
    (alt (encapsulate
      (alt (sbracket description "\"" "\"" (match (* (alphabet a1))))
          (sbracket description "'" "'" (match (* (alphabet a2))))))))
 
+(define parse-system-literal           ;[11]
+  (string-parser "system literal" char-set:xml-char))
+
 (define parse-public-id-literal                ;[12,13]
   (string-parser
    "public-ID literal"
    (char-set-union char-set:alphanumeric
                   (string->char-set " \r\n-'()+,./:=?;!*#@$_%"))))
-
-(define parse-system-literal           ;[11]
-  (string-parser "system literal" char-set:xml-char))
 \f
-(define parse-ndata-decl               ;[76]
+(define parse-conditional-section      ;[61]
   (*parser
-   (seq S
-       (noise (string "NDATA"))
-       S
-       parse-name)))
+   (alt parse-!include
+       parse-!ignore)))
+
+(define-integrable conditional-start "<![")
+(define-integrable conditional-end "]]>")
 
-(define parse-notation-decl            ;[82,83]
+(define parse-!include                 ;[62]
   (*parser
-   (sbracket "notation declaration" "<!NOTATION" ">"
-     S
-     parse-name
-     S
-     (alt parse-external-id
-         (encapsulate
-             (lambda (v)
-               (make-xml-external-id (vector-ref v 0) #f))
-           (seq (noise (string "PUBLIC"))
-                S
-                parse-public-id-literal)))
-     S?)))
+   (encapsulate (lambda (v) (make-xml-!include (vector->list v)))
+     (bracket "include section"
+        (seq (noise (string conditional-start))
+             S?
+             (noise (string "INCLUDE"))
+             S?
+             (noise (string "[")))
+        (noise (string conditional-end))
+       parse-external-subset-decl))))
+
+(define parse-!ignore                  ;[63]
+  (*parser
+   (encapsulate (lambda (v) (make-xml-!ignore (vector->list v)))
+     (bracket "ignore section"
+        (seq (noise (string conditional-start))
+             S?
+             (noise (string "IGNORE"))
+             S?
+             (noise (string "[")))
+        (noise (string conditional-end))
+       (* parse-!ignore-contents)))))
+
+(define parse-!ignore-contents         ;[64,65]
+  (let ((parser
+        (terminated-region-parser "ignore section" char-set:xml-char
+                                  conditional-start conditional-end)))
+    (*parser
+     (seq parser
+         (* (seq (sbracket "ignore section" conditional-start conditional-end
+                   parse-!ignore-contents)
+                 parser))))))
 
 ;;; Edwin Variables:
 ;;; Eval: (scheme-indent-method 'encapsulate 1)
index 1f966a3a00c9dd83daa1114e27930250f5a35289..980a8fc3c3b1f33283523001bd898c360237abec 100644 (file)
@@ -1,6 +1,6 @@
 ;;; -*-Scheme-*-
 ;;;
-;;; $Id: xml-struct.scm,v 1.1 2001/07/05 20:47:53 cph Exp $
+;;; $Id: xml-struct.scm,v 1.2 2001/07/06 20:50:49 cph Exp $
 ;;;
 ;;; Copyright (c) 2001 Massachusetts Institute of Technology
 ;;;
   id
   uri)
 
-(define-structure (xml-element-declaration
+(define-structure (xml-!element
                   (print-procedure
-                   (standard-unparser-method 'XML-ELEMENT-DECLARATION
+                   (standard-unparser-method 'XML-!ELEMENT
                      (lambda (element port)
                        (write-char #\space port)
-                       (write (xml-element-declaration-name element) port)))))
+                       (write (xml-!element-name element) port)))))
   name
   content-type)
 
-(define-structure (xml-attribute-declaration
+(define-structure (xml-!attlist
                   (print-procedure
-                   (standard-unparser-method 'XML-ATTRIBUTE-DECLARATION
+                   (standard-unparser-method 'XML-!ATTLIST
                      (lambda (element port)
                        (write-char #\space port)
-                       (write (xml-attribute-declaration-name element)
-                              port)))))
+                       (write (xml-!attlist-name element) port)))))
   name
   definitions)
 
-(define-structure xml-include-section
+(define-structure xml-!include
   contents)
 
-(define-structure xml-ignore-section
+(define-structure xml-!ignore
   contents)
 
-(define-structure (xml-entity-declaration
+(define-structure (xml-!entity
                   (print-procedure
-                   (standard-unparser-method 'XML-ENTITY-DECLARATION
+                   (standard-unparser-method 'XML-!ENTITY
                      (lambda (element port)
                        (write-char #\space port)
-                       (write (xml-entity-declaration-name element) port)))))
+                       (write (xml-!entity-name element) port)))))
   name
   value)
 
-(define-structure (xml-parameter-entity-declaration
+(define-structure (xml-unparsed-!entity
                   (print-procedure
-                   (standard-unparser-method 'XML-PARAMETER-ENTITY-DECLARATION
+                   (standard-unparser-method 'XML-UNPARSED-!ENTITY
                      (lambda (element port)
                        (write-char #\space port)
-                       (write (xml-parameter-entity-declaration-name element)
-                              port)))))
+                       (write (xml-unparsed-!entity-name element) port)))))
+  name
+  id
+  notation)
+
+(define-structure (xml-parameter-!entity
+                  (print-procedure
+                   (standard-unparser-method 'XML-PARAMETER-!ENTITY
+                     (lambda (element port)
+                       (write-char #\space port)
+                       (write (xml-parameter-!entity-name element) port)))))
   name
   value)
 
-(define-structure (xml-notation-declaration
+(define-structure (xml-!notation
                   (print-procedure
-                   (standard-unparser-method 'XML-NOTATION-DECLARATION
+                   (standard-unparser-method 'XML-!NOTATION
                      (lambda (element port)
                        (write-char #\space port)
-                       (write (xml-notation-declaration-name element)
-                              port)))))
+                       (write (xml-!notation-name element) port)))))
   name
-  value)
\ No newline at end of file
+  id)
\ No newline at end of file
diff --git a/v7/src/xml/xml.pkg b/v7/src/xml/xml.pkg
new file mode 100644 (file)
index 0000000..17ec3cf
--- /dev/null
@@ -0,0 +1,147 @@
+;;; -*-Scheme-*-
+;;;
+;;; $Id: xml.pkg,v 1.1 2001/07/06 20:50:45 cph Exp $
+;;;
+;;; Copyright (c) 2001 Massachusetts Institute of Technology
+;;;
+;;; This program is free software; you can redistribute it and/or
+;;; modify it under the terms of the GNU General Public License as
+;;; published by the Free Software Foundation; either version 2 of the
+;;; License, or (at your option) any later version.
+;;;
+;;; This program is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;; General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with this program; if not, write to the Free Software
+;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+;;; 02111-1307, USA.
+
+;;;; XML: packaging
+
+(global-definitions "$brun/runtime")
+(global-definitions "../parser/parser")
+
+(define-package (runtime xml)
+  (files)
+  (parent ()))
+
+(define-package (runtime xml structure)
+  (files "xml-struct")
+  (parent ())
+  (export ()
+         make-xml-!attlist
+         make-xml-!element
+         make-xml-!entity
+         make-xml-!ignore
+         make-xml-!include
+         make-xml-!notation
+         make-xml-comment
+         make-xml-declaration
+         make-xml-document
+         make-xml-dtd
+         make-xml-element
+         make-xml-entity-reference
+         make-xml-external-id
+         make-xml-parameter-!entity
+         make-xml-parameter-entity-reference
+         make-xml-processing-instructions
+         make-xml-unparsed-!entity
+         set-xml-!attlist-definitions!
+         set-xml-!attlist-name!
+         set-xml-!element-content-type!
+         set-xml-!element-name!
+         set-xml-!entity-name!
+         set-xml-!entity-value!
+         set-xml-!ignore-contents!
+         set-xml-!include-contents!
+         set-xml-!notation-id!
+         set-xml-!notation-name!
+         set-xml-comment-text!
+         set-xml-declaration-encoding!
+         set-xml-declaration-standalone?!
+         set-xml-declaration-version!
+         set-xml-document-declaration!
+         set-xml-document-dtd!
+         set-xml-document-misc-1!
+         set-xml-document-misc-2!
+         set-xml-document-misc-3!
+         set-xml-document-root!
+         set-xml-dtd-external!
+         set-xml-dtd-internal!
+         set-xml-dtd-root!
+         set-xml-element-attributes!
+         set-xml-element-contents!
+         set-xml-element-name!
+         set-xml-entity-reference-name!
+         set-xml-external-id-id!
+         set-xml-external-id-uri!
+         set-xml-parameter-!entity-name!
+         set-xml-parameter-!entity-value!
+         set-xml-processing-instructions-name!
+         set-xml-processing-instructions-text!
+         set-xml-unparsed-!entity-id!
+         set-xml-unparsed-!entity-name!
+         set-xml-unparsed-!entity-notation!
+         xml-!attlist-definitions
+         xml-!attlist-name
+         xml-!attlist?
+         xml-!element-content-type
+         xml-!element-name
+         xml-!element?
+         xml-!entity-name
+         xml-!entity-value
+         xml-!entity?
+         xml-!ignore-contents
+         xml-!ignore?
+         xml-!include-contents
+         xml-!include?
+         xml-!notation-id
+         xml-!notation-name
+         xml-!notation?
+         xml-comment-text
+         xml-comment?
+         xml-declaration-encoding
+         xml-declaration-standalone?
+         xml-declaration-version
+         xml-declaration?
+         xml-document-declaration
+         xml-document-dtd
+         xml-document-misc-1
+         xml-document-misc-2
+         xml-document-misc-3
+         xml-document-root
+         xml-document?
+         xml-dtd-external
+         xml-dtd-internal
+         xml-dtd-root
+         xml-dtd?
+         xml-element-attributes
+         xml-element-contents
+         xml-element-name
+         xml-element?
+         xml-entity-reference-name
+         xml-entity-reference?
+         xml-external-id-id
+         xml-external-id-uri
+         xml-external-id?
+         xml-intern
+         xml-parameter-!entity-name
+         xml-parameter-!entity-value
+         xml-parameter-!entity?
+         xml-parameter-entity-reference?
+         xml-processing-instructions-name
+         xml-processing-instructions-text
+         xml-processing-instructions?
+         xml-unparsed-!entity-id
+         xml-unparsed-!entity-name
+         xml-unparsed-!entity-notation
+         xml-unparsed-!entity?))
+
+(define-package (runtime xml parser)
+  (files "xml-parser")
+  (parent ())
+  (export ()
+         parse-xml-document))
\ No newline at end of file