From fe2a530667a8c5b4b129abef219426738e0df5ed Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Fri, 26 Sep 2003 19:39:06 +0000 Subject: [PATCH] Another major upheaval, this time to give attributes an opaque representation. --- v7/src/xml/xml-output.scm | 54 +++--- v7/src/xml/xml-parser.scm | 346 +++++++++++++++++--------------------- v7/src/xml/xml-struct.scm | 293 ++++++++++++++++---------------- v7/src/xml/xml.pkg | 29 +++- 4 files changed, 351 insertions(+), 371 deletions(-) diff --git a/v7/src/xml/xml-output.scm b/v7/src/xml/xml-output.scm index 76a145800..b268890ea 100644 --- a/v7/src/xml/xml-output.scm +++ b/v7/src/xml/xml-output.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: xml-output.scm,v 1.27 2003/09/26 05:35:36 cph Exp $ +$Id: xml-output.scm,v 1.28 2003/09/26 19:39:01 cph Exp $ Copyright 2001,2002,2003 Massachusetts Institute of Technology @@ -333,52 +333,48 @@ USA. (write-xml-name (xml-parameter-entity-ref-name ref) ctx) (emit-string ";" ctx)) -(define (write-xml-attributes attributes suffix-cols ctx) +(define (write-xml-attributes attrs suffix-cols ctx) (let ((col (and (ctx-indent-attributes? ctx) (ctx-start-col ctx)))) (if (and col - (pair? attributes) - (pair? (cdr attributes)) + (pair? attrs) + (pair? (cdr attrs)) (>= (+ col - (xml-attributes-columns attributes) + (xml-attributes-columns attrs) suffix-cols) (ctx-x-size ctx))) (begin (emit-char #\space ctx) - (write-xml-attribute (car attributes) ctx) - (for-each (lambda (attribute) + (write-xml-attribute (car attrs) ctx) + (for-each (lambda (attr) (write-indent (+ col 1) ctx) - (write-xml-attribute attribute ctx)) - (cdr attributes))) - (for-each (lambda (attribute) + (write-xml-attribute attr ctx)) + (cdr attrs))) + (for-each (lambda (attr) (emit-char #\space ctx) - (write-xml-attribute attribute ctx)) - attributes)))) - -(define (xml-attributes-columns attributes) - (let loop ((attributes attributes) (n-cols 0)) - (if (pair? attributes) - (loop (cdr attributes) - (+ n-cols 1 (xml-attribute-columns (car attributes)))) - n-cols))) - -(define (write-xml-attribute attribute ctx) - (write-xml-name (car attribute) ctx) + (write-xml-attribute attr ctx)) + attrs)))) + +(define (xml-attributes-columns attrs) + (do ((attrs attrs (cdr attrs)) + (n-cols 0 (+ n-cols 1 (xml-attribute-columns (car attrs))))) + ((not (pair? attrs)) n-cols))) + +(define (write-xml-attribute attr ctx) + (write-xml-name (xml-attribute-name attr) ctx) (emit-char #\= ctx) - (write-xml-attribute-value (cdr attribute) ctx)) + (write-xml-attribute-value (xml-attribute-value attr) ctx)) (define (write-xml-attribute-value value ctx) (emit-char #\" ctx) - (for-each (lambda (item) - (write-xml-string item ctx)) - value) + (write-xml-string value ctx) (emit-char #\" ctx)) -(define (xml-attribute-columns attribute) - (+ (xml-name-columns (car attribute)) +(define (xml-attribute-columns attr) + (+ (xml-name-columns (xml-attribute-name attr)) 3 - (xml-string-columns (cadr attribute)))) + (xml-string-columns (xml-attribute-value attr)))) (define (write-xml-string string ctx) (write-escaped-string string diff --git a/v7/src/xml/xml-parser.scm b/v7/src/xml/xml-parser.scm index cdbe652ae..38e4f9526 100644 --- a/v7/src/xml/xml-parser.scm +++ b/v7/src/xml/xml-parser.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: xml-parser.scm,v 1.48 2003/09/26 05:35:40 cph Exp $ +$Id: xml-parser.scm,v 1.49 2003/09/26 19:39:03 cph Exp $ Copyright 2001,2002,2003 Massachusetts Institute of Technology @@ -233,65 +233,59 @@ USA. ;;;; Elements -(define (parse-element buffer) ;[39] - (let ((p (get-parser-buffer-pointer buffer))) +(define (parse-element b) ;[39] + (let ((p (get-parser-buffer-pointer b))) (fluid-let ((*prefix-bindings* *prefix-bindings*)) - (let ((v (parse-start-tag buffer))) + (let ((v (parse-start-tag b))) (and v - (vector - (make-xml-element - (vector-ref v 0) - (vector-ref v 1) - (if (string=? (vector-ref v 2) ">") - (let loop ((elements '#())) - (let ((v* (parse-end-tag buffer))) - (if v* - (begin - (if (not (eq? (vector-ref v 0) (vector-ref v* 0))) - (perror p "Mismatched start tag" - (vector-ref v 0) (vector-ref v* 0))) - (let ((contents - (coalesce-strings! - (delete-matching-items! - (vector->list elements) - (lambda (element) - (and (string? element) - (string-null? element))))))) - (if (null? contents) - ;; Preserve fact that this element - ;; was formed by a start/end tag pair - ;; rather than by an empty-element - ;; tag. - (list "") - contents))) - (let ((v* (parse-content buffer))) - (if (not v*) - (perror p "Unterminated start tag" - (vector-ref v 0))) - (if (equal? v* '#("")) - (perror p "Unknown content")) - (loop (vector-append elements v*)))))) - '())))))))) + (begin + (namespace-processing! v p) + (vector (let ((name (vector-ref v 0))) + (make-xml-element name + (vector-ref v 1) + (if (string=? (vector-ref v 2) ">") + (parse-element-content b p name) + '())))))))))) (define parse-start-tag ;[40,44] (*parser (top-level - (with-pointer p - (transform (lambda (v) - (let* ((name (vector-ref v 0)) - (attributes - (process-attr-decls name (vector-ref v 1) p))) - (process-namespace-decls attributes) - (vector (intern-element-name name) - (map (lambda (attr) - (cons (intern-attribute-name (car attr)) - (cdr attr))) - attributes) - (vector-ref v 2)))) - (bracket "start tag" - (seq "<" parse-uninterned-name) - (match (alt ">" "/>")) - parse-attribute-list)))))) + (bracket "start tag" + (seq "<" parse-unexpanded-name) + (match (alt ">" "/>")) + parse-attribute-list)))) + +(define (namespace-processing! v p) + (let* ((uname (vector-ref v 0)) + (attrs (process-attr-decls (car uname) (vector-ref v 1) p))) + (process-namespace-decls attrs) + (vector-set! v 0 (expand-element-name uname)) + (for-each (lambda (attr) + (set-xml-attribute-name! attr + (expand-attribute-name + (xml-attribute-name attr)))) + attrs))) + +(define (parse-element-content b p name) + (let loop ((elements '#())) + (let ((v (parse-end-tag b))) + (if v + (begin + (if (not (xml-name=? (vector-ref v 0) name)) + (perror p "Mismatched start tag" (vector-ref v 0) name)) + (let ((contents (coalesce-strings! (vector->list elements)))) + (if (null? contents) + ;; Preserve fact that this element was formed by a + ;; start/end tag pair rather than by an empty + ;; element tag. + (list "") + contents))) + (let ((v (parse-content b))) + (if (not v) + (perror p "Unterminated start tag" name)) + (if (equal? v '#("")) + (perror p "Unknown content")) + (loop (vector-append elements v))))))) (define parse-end-tag ;[42] (*parser @@ -312,68 +306,41 @@ USA. ;;;; Attribute defaulting -(define (process-attr-decls name attributes p) +(define (process-attr-decls qname attrs p) (let ((decl (and (or *standalone?* *internal-dtd?*) (find-matching-item *att-decls* - (let ((name (string->symbol (car name)))) - (lambda (decl) - (eq? name (xml-!attlist-name decl)))))))) + (lambda (decl) + (xml-name=? (xml-!attlist-name decl) qname)))))) (if decl - (let loop - ((definitions (xml-!attlist-definitions decl)) - (attributes attributes)) - (if (pair? definitions) - (loop (cdr definitions) - (process-attr-defn (car definitions) attributes p)) - attributes)) - attributes))) - -(define (process-attr-defn definition attributes p) - (let ((name (symbol-name (car definition))) - (type (cadr definition)) - (default (caddr definition))) - (let ((attribute - (find-matching-item attributes - (lambda (attribute) - (string=? name (caar attribute)))))) - (if attribute - (let ((av (cdr attribute))) + (do ((defns (xml-!attlist-definitions decl) (cdr defns)) + (attrs attrs (process-attr-defn (car defns) attrs p))) + ((not (pair? defns)) attrs)) + attrs))) + +(define (process-attr-defn defn attrs p) + (let ((qname (car defn)) + (type (cadr defn)) + (default (caddr defn))) + (let ((attr + (find-matching-item attrs + (lambda (attr) + (xml-name=? (car (xml-attribute-name attr)) qname))))) + (if attr + (let ((av (xml-attribute-value attr))) (if (and (pair? default) (eq? (car default) '|#FIXED|) - (not (attribute-value=? av (cdr default)))) - (perror (cdar attribute) - "Incorrect attribute value" - (string->symbol name))) + (not (string=? av (cdr default)))) + (perror (cdar attr) "Incorrect attribute value" qname)) (if (not (eq? type '|CDATA|)) - (set-car! av (trim-attribute-whitespace (car av)))) - attributes) + (set-xml-attribute-value! attr (trim-attribute-whitespace av))) + attrs) (begin (if (eq? default '|#REQUIRED|) - (perror p - "Missing required attribute value" - (string->symbol name))) + (perror p "Missing required attribute value" qname)) (if (pair? default) - (cons (cons (cons name p) (cdr default)) - attributes) - attributes)))))) - -(define (attribute-value=? v1 v2) - (and (boolean=? (pair? v1) (pair? v2)) - (if (pair? v1) - (and (let ((i1 (car v1)) - (i2 (car v2))) - (cond ((string? i1) - (and (string? i2) - (string=? i1 i2))) - ((xml-entity-ref? i1) - (and (xml-entity-ref? i2) - (eq? (xml-entity-ref-name i1) - (xml-entity-ref-name i2)))) - (else - (error "Unknown attribute value item:" i1)))) - (attribute-value=? (cdr v1) (cdr v2))) - #t))) + (cons (%make-xml-attribute (cons qname p) (cdr default)) attrs) + attrs)))))) ;;;; Other markup @@ -419,15 +386,15 @@ USA. (*parser (require-success "Malformed element name" parse-element-name))) (define parse-element-name - (*parser (map intern-element-name parse-uninterned-name))) + (*parser (map expand-element-name parse-unexpanded-name))) (define parse-attribute-name - (*parser (map intern-attribute-name parse-uninterned-name))) + (*parser (map expand-attribute-name parse-unexpanded-name))) -(define parse-uninterned-name ;[5] +(define parse-unexpanded-name ;[5] (*parser (with-pointer p - (map (lambda (s) (cons s p)) + (map (lambda (s) (cons (make-xml-qname s) p)) (match (seq (? (seq match-name ":")) match-name)))))) @@ -458,53 +425,49 @@ USA. (loop) #t)))) -(define (process-namespace-decls attributes) +;;;; Namespaces + +(define (process-namespace-decls attrs) (set! *prefix-bindings* - (let loop ((attributes attributes)) - (if (pair? attributes) - (let ((name (xml-attribute-name (car attributes))) - (tail (loop (cdr attributes)))) - (let ((s (car name)) - (pn (cdr name))) - (let ((iri - (lambda () - (string->symbol - (xml-attribute-value (car attributes))))) + (let loop ((attrs attrs)) + (if (pair? attrs) + (let ((uname (xml-attribute-name (car attrs))) + (value (xml-attribute-value (car attrs))) + (tail (loop (cdr attrs)))) + (let ((qname (car uname)) + (p (cdr uname))) + (let ((get-iri (lambda () (make-xml-namespace-iri value))) (forbidden-iri (lambda (iri) - (perror pn "Forbidden namespace IRI" iri)))) + (perror p "Forbidden namespace IRI" iri)))) (let ((guarantee-legal-iri (lambda (iri) (if (or (eq? iri xml-iri) (eq? iri xmlns-iri)) (forbidden-iri iri))))) - (cond ((string=? "xmlns" s) - (let ((iri (iri))) + (cond ((xml-name=? qname 'xmlns) + (let ((iri (get-iri))) (guarantee-legal-iri iri) (cons (cons (null-xml-name-prefix) iri) tail))) - ((string-prefix? "xmlns:" s) - (if (string=? "xmlns:xmlns" s) - (perror pn "Illegal namespace prefix" s)) - (let ((iri (iri))) - (if (null-xml-namespace-iri? iri) - ;; legal in XML 1.1 - (forbidden-iri "")) - (if (string=? "xmlns:xml" s) + ((xml-name-prefix=? qname 'xmlns) + (if (xml-name=? qname 'xmlns:xmlns) + (perror p "Illegal namespace prefix" qname)) + (let ((iri (get-iri))) + (if (xml-name=? qname 'xmlns:xml) (if (not (eq? iri xml-iri)) (forbidden-iri iri)) (guarantee-legal-iri iri)) - (cons (cons (string-tail->symbol s 6) iri) - tail))) + (cons (cons (xml-name-local qname) iri) tail))) (else tail)))))) *prefix-bindings*))) unspecific) -(define (intern-element-name n) (intern-name n #f)) -(define (intern-attribute-name n) (intern-name n #t)) +(define (expand-element-name uname) (expand-name uname #f)) +(define (expand-attribute-name uname) (expand-name uname #t)) -(define (intern-name n attribute-name?) - (let ((qname (string->symbol (car n))) - (p (cdr n))) +(define (expand-name uname attribute-name?) + (let ((qname (car uname)) + (p (cdr uname))) (if *in-dtd?* qname (let ((iri (lookup-namespace-prefix qname p attribute-name?))) @@ -654,25 +617,29 @@ USA. ;;;; Attributes -(define (attribute-list-parser parse-name) +(define (attribute-list-parser parse-name name=?) (let ((parse-attribute (attribute-parser parse-name))) (*parser (with-pointer p (encapsulate (lambda (v) - (let ((alist (vector->list v))) - (do ((alist alist (cdr alist))) - ((not (pair? alist))) - (let ((entry (assq (caar alist) (cdr alist)))) - (if entry - (perror p "Duplicate entry in attribute list")))) - alist)) + (let ((attrs (vector->list v))) + (do ((attrs attrs (cdr attrs))) + ((not (pair? attrs))) + (let ((name (xml-attribute-name (car attrs)))) + (if (there-exists? (cdr attrs) + (lambda (attr) + (name=? (xml-attribute-name attr) name))) + (perror p "Attributes with same name" name)))) + attrs)) (seq (* parse-attribute) S?)))))) (define (attribute-parser parse-name) ;[41,25] (*parser - (encapsulate (lambda (v) (cons (vector-ref v 0) (vector-ref v 1))) + (encapsulate (lambda (v) + (%make-xml-attribute (vector-ref v 0) + (vector-ref v 1))) (seq S parse-name S? @@ -680,12 +647,14 @@ USA. S? parse-attribute-value)))) -(define parse-declaration-attributes - (attribute-list-parser (*parser (map make-xml-qname (match match-name))))) - (define parse-attribute-list - (attribute-list-parser parse-uninterned-name)) + (attribute-list-parser parse-unexpanded-name + (lambda (a b) (xml-name=? (car a) (car b))))) +(define parse-declaration-attributes + (attribute-list-parser (*parser (map make-xml-qname (match match-name))) + xml-name=?)) + (define (attribute-value-parser alphabet parse-reference) (let ((a1 (alphabet- alphabet (string->alphabet "\""))) (a2 (alphabet- alphabet (string->alphabet "'")))) @@ -731,46 +700,43 @@ USA. (null? (cdr elements)))) (error "Uncoalesced attribute value:" elements)) (normalize-attribute-value (car elements))) - (require-success "Malformed attribute value" - parser))))) + (require-success "Malformed attribute value" parser))))) ;;;; Normalization (define (normalize-attribute-value string) - (coalesce-strings! - (reverse! - (let loop ((string string) (result '())) - (let ((buffer (string->parser-buffer (normalize-line-endings string)))) - (let normalize-string ((port (open-output-string)) (result result)) - (let* ((p (get-parser-buffer-pointer buffer)) - (char (read-parser-buffer-char buffer))) - (case char - ((#f) - (cons (get-output-string port) result)) - ((#\tab #\newline #\return) - (write-char #\space port) - (normalize-string port result)) - ((#\&) - (set-parser-buffer-pointer! buffer p) - (let ((v (parse-char-reference buffer))) - (if v - (begin - (write-string (vector-ref v 0) port) - (normalize-string port result)) - (normalize-string - (open-output-string) - (let ((name - (vector-ref (parse-entity-reference-name buffer) - 0))) - (let ((value (dereference-entity name #t p))) - (expand-entity-value name p - (lambda () - (loop value - (cons (get-output-string port) - result)))))))))) - (else - (write-char char port) - (normalize-string port result)))))))))) + (call-with-output-string + (lambda (port) + (let normalize-string ((string string)) + (let ((b (string->parser-buffer (normalize-line-endings string)))) + (let loop () + (let* ((p (get-parser-buffer-pointer b)) + (char (read-parser-buffer-char b))) + (case char + ((#f) + unspecific) + ((#\tab #\newline #\return) + (write-char #\space port) + (loop)) + ((#\&) + (set-parser-buffer-pointer! b p) + (let ((v (parse-char-reference b))) + (if v + (begin + (write-string (vector-ref v 0) port) + (loop)) + (begin + (let ((name + (vector-ref (parse-entity-reference-name b) + 0))) + (let ((value (dereference-entity name #t p))) + (expand-entity-value name p + (lambda () + (normalize-string value))))) + (loop))))) + (else + (write-char char port) + (loop)))))))))) (define (trim-attribute-whitespace string) (call-with-output-string @@ -1077,8 +1043,8 @@ USA. (list name type (if (and (not (eq? type '|CDATA|)) (pair? default)) - (list (car default) - (trim-attribute-whitespace (cadr default))) + (cons (car default) + (trim-attribute-whitespace (cdr default))) default)))) (seq S parse-attribute-name diff --git a/v7/src/xml/xml-struct.scm b/v7/src/xml/xml-struct.scm index 3d1d1d864..58d8e72ca 100644 --- a/v7/src/xml/xml-struct.scm +++ b/v7/src/xml/xml-struct.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: xml-struct.scm,v 1.36 2003/09/26 05:35:43 cph Exp $ +$Id: xml-struct.scm,v 1.37 2003/09/26 19:39:06 cph Exp $ Copyright 2001,2002,2003 Massachusetts Institute of Technology @@ -30,11 +30,17 @@ USA. (define-syntax define-xml-type (sc-macro-transformer (lambda (form environment) - (if (syntax-match? '(IDENTIFIER * (IDENTIFIER EXPRESSION ? EXPRESSION)) - (cdr form)) + (if (and (pair? (cdr form)) + (identifier? (cadr form)) + (list-of-type? (cddr form) + (lambda (slot) + (or (syntax-match? '(IDENTIFIER EXPRESSION) slot) + (syntax-match? '(IDENTIFIER 'CANONICALIZE EXPRESSION) + slot))))) (let ((root (symbol-append 'XML- (cadr form))) (slots (cddr form))) (let ((rtd (symbol-append '< root '>)) + (%constructor (symbol-append '%MAKE- root)) (constructor (symbol-append 'MAKE- root)) (predicate (symbol-append root '?)) (error:not (symbol-append 'ERROR:NOT- root)) @@ -42,11 +48,18 @@ USA. (map (lambda (slot) (close-syntax (car slot) environment)) slots))) - (let ((test - (lambda (slot var name) - `(IF (NOT (,(close-syntax (cadr slot) environment) ,var)) - (ERROR:WRONG-TYPE-ARGUMENT - ,var ,(symbol->string (car slot)) ',name))))) + (let ((canonicalize + (lambda (slot var caller) + (if (eq? (cadr slot) 'CANONICALIZE) + `(,(close-syntax (caddr slot) environment) ,var) + `(BEGIN + (IF (NOT (,(close-syntax (cadr slot) environment) + ,var)) + (ERROR:WRONG-TYPE-ARGUMENT + ,var + ,(symbol->string (car slot)) + ',caller)) + ,var))))) `(BEGIN (DEFINE ,rtd (MAKE-RECORD-TYPE ',root '(,@(map car slots)))) @@ -63,19 +76,14 @@ USA. #\- #\space)) CALLER)) - (DEFINE ,constructor - (LET ((CONSTRUCTOR - (RECORD-CONSTRUCTOR ,rtd '(,@(map car slots))))) - (NAMED-LAMBDA (,constructor ,@slot-vars) - ,@(map (lambda (slot var) (test slot var constructor)) - slots slot-vars) - (CONSTRUCTOR - ,@(map (lambda (slot var) - (if (pair? (cddr slot)) - `(,(caddr slot) ,var) - var)) - slots - slot-vars))))) + (DEFINE ,%constructor + (RECORD-CONSTRUCTOR ,rtd '(,@(map car slots)))) + (DEFINE (,constructor ,@slot-vars) + (,%constructor + ,@(map (lambda (slot var) + (canonicalize slot var constructor)) + slots + slot-vars))) ,@(map (lambda (slot var) (let* ((accessor (symbol-append root '- (car slot))) (modifier (symbol-append 'SET- accessor '!))) @@ -86,8 +94,10 @@ USA. (LET ((MODIFIER (RECORD-MODIFIER ,rtd ',(car slot)))) (NAMED-LAMBDA (,modifier OBJECT ,var) - ,(test slot var modifier) - (MODIFIER OBJECT ,var))))))) + (MODIFIER OBJECT + ,(canonicalize slot + var + modifier)))))))) slots slot-vars))))) (ill-formed-syntax form))))) @@ -136,34 +146,44 @@ USA. (char-set-union char-set:alphanumeric (string->char-set "_.-"))) +(define-xml-type attribute + (name xml-name?) + (value canonicalize canonicalize-char-data)) + +(define (xml-char-data? object) + (or (wide-char? object) + (wide-string? object) + (and (string? object) + (utf8-string-valid? object)))) + +(define (canonicalize-char-data object) + (cond ((wide-char? object) + (call-with-output-string + (lambda (port) + (write-utf8-char object port)))) + ((wide-string? object) + (wide-string->utf8-string object)) + ((and (string? object) + (utf8-string-valid? object)) + object) + (else (error:wrong-type-datum object "an XML char data")))) + (define-xml-type element (name xml-name?) - (attributes xml-attribute-list? canonicalize-attributes) + (attributes xml-attribute-list?) (contents xml-content?)) (define (xml-attribute-list? object) (and (list-of-type? object xml-attribute?) - (let loop ((attributes object)) - (if (pair? attributes) - (and (not (there-exists? (cdr attributes) - (let ((name (caar attributes))) - (lambda (attribute) - (xml-name=? (car attribute) name))))) - (loop (cdr attributes))) + (let loop ((attrs object)) + (if (pair? attrs) + (and (not (there-exists? (cdr attrs) + (let ((name (xml-attribute-name (car attrs)))) + (lambda (attr) + (xml-name=? (xml-attribute-name attr) name))))) + (loop (cdr attrs))) #t)))) -(define (xml-attribute? object) - (and (pair? object) - (xml-name? (car object)) - (xml-attribute-value? (cdr object)))) - -(define (xml-attribute-value? object) - (and (pair? object) - (list-of-type? object xml-attribute-value-item?))) - -(define (xml-attribute-value-item? object) - (xml-char-data? object)) - (define (xml-content? object) (list-of-type? object xml-content-item?)) @@ -173,98 +193,74 @@ USA. (xml-element? object) (xml-processing-instructions? object))) -(define-xml-type comment - (text xml-char-data? canonicalize-char-data)) - -(define-xml-type processing-instructions - (name - (lambda (object) - (and (xml-qname? object) - (not (string-ci=? "xml" (symbol-name object)))))) - (text xml-char-data? canonicalize-char-data)) - -(define (xml-char-data? object) - (or (string? object) - (wide-char? object) - (wide-string? object))) - -(define (canonicalize-attributes attributes) - (map (lambda (a) - (cons (car a) - (canonicalize-attribute-value (cdr a)))) - attributes)) - -(define (xml-attribute-name attr) - (car attr)) - -(define (xml-attribute-value attr) - (cadr attr)) - -(define (canonicalize-attribute-value v) - (canonicalize-content v)) - -(define (canonicalize-entity-value v) - (if (xml-external-id? v) - v - (canonicalize-attribute-value v))) - (define (canonicalize-content content) (letrec ((search (lambda (items) (if (pair? items) - (let ((item (canonicalize-char-data (car items))) + (let ((item (car items)) (items (cdr items))) - (if (string? item) - (join item items) - (cons item (search items)))) + (if (xml-char-data? item) + (join (canonicalize-char-data item) items) + (begin + (check-item item) + (cons item (search items))))) '()))) (join (lambda (s items) (if (pair? items) - (let ((item (canonicalize-char-data (car items))) + (let ((item (car items)) (items (cdr items))) - (if (string? item) - (join (string-append s item) items) - (cons* s item (search items)))) - (list s))))) + (if (xml-char-data? item) + (join (string-append s (canonicalize-char-data item)) + items) + (begin + (check-item item) + (cons* s item (search items))))) + (list s)))) + (check-item + (lambda (item) + (if (not (or (xml-comment? item) + (xml-element? item) + (xml-processing-instructions? item))) + (error:wrong-type-datum content "an XML content"))))) (search content))) - -(define (canonicalize-char-data object) - (cond ((wide-char? object) - (call-with-output-string - (lambda (port) - (write-utf8-char object port)))) - ((wide-string? object) (wide-string->utf8-string object)) - (else object))) +(define-xml-type comment + (text canonicalize canonicalize-char-data)) + +(define-xml-type processing-instructions + (name + (lambda (object) + (and (xml-qname? object) + (not (xml-name=? object 'xml))))) + (text canonicalize canonicalize-char-data)) + (define-xml-type dtd (root xml-name?) - (external - (lambda (object) - (or (not object) - (xml-external-id? object)))) - (internal - (lambda (object) - (list-of-type? object - (lambda (object) - (or (xml-comment? object) - (xml-!element? object) - (xml-!attlist? object) - (xml-!entity? object) - (xml-unparsed-!entity? object) - (xml-parameter-!entity? object) - (xml-!notation? object) - (xml-parameter-entity-ref? object))))))) + (external (lambda (object) + (or (not object) + (xml-external-id? object)))) + (internal (lambda (object) + (list-of-type? object + (lambda (object) + (or (xml-comment? object) + (xml-!element? object) + (xml-!attlist? object) + (xml-!entity? object) + (xml-unparsed-!entity? object) + (xml-parameter-!entity? object) + (xml-!notation? object) + (xml-parameter-entity-ref? object))))))) (define-xml-type external-id (id (lambda (object) (or (not object) (public-id? object)))) - (iri (lambda (object) - (or (not object) - (xml-char-data? object))) - canonicalize-char-data)) + (iri canonicalize + (lambda (object) + (and object + (canonicalize-char-data object))))) (define (public-id? object) (string-composed-of? object char-set:xml-public-id)) @@ -309,24 +305,24 @@ USA. (define-xml-type !attlist (name xml-qname?) - (definitions - (lambda (object) - (list-of-type? object - (lambda (item) - (and (pair? item) - (xml-qname? (car item)) - (pair? (cdr item)) - (!attlist-type? (cadr item)) - (pair? (cddr item)) - (!attlist-default? (caddr item)) - (null? (cdddr item)))))) + (definitions canonicalize (lambda (object) + (if (not (list-of-type? object + (lambda (item) + (and (pair? item) + (xml-qname? (car item)) + (pair? (cdr item)) + (!attlist-type? (cadr item)) + (pair? (cddr item)) + (!attlist-default? (caddr item)) + (null? (cdddr item)))))) + (error:wrong-type-datum object "an XML !ATTLIST definition")) (map (lambda (item) (let ((d (caddr item))) (if (pair? d) (list (car item) (cadr item) - (cons (car d) (canonicalize-attribute-value (cdr d)))) + (cons (car d) (canonicalize-char-data (cdr d)))) item))) object)))) @@ -340,25 +336,22 @@ USA. (eq? object '|NMTOKENS|) (eq? object '|NMTOKEN|) (and (pair? object) - (eq? '|NOTATION| (car object)) - (list-of-type? (cdr object) xml-qname?)) - (and (pair? object) - (eq? 'enumerated (car object)) - (list-of-type? (cdr object) xml-nmtoken?)))) + (or (and (eq? (car object) '|NOTATION|) + (list-of-type? (cdr object) xml-qname?)) + (and (eq? (car object) 'enumerated) + (list-of-type? (cdr object) xml-nmtoken?)))))) (define (!attlist-default? object) (or (eq? object '|#REQUIRED|) (eq? object '|#IMPLIED|) (and (pair? object) - (eq? '|#FIXED| (car object)) - (xml-attribute-value? (cdr object))) - (and (pair? object) - (eq? 'default (car object)) - (xml-attribute-value? (cdr object))))) + (or (eq? (car object) '|#FIXED|) + (eq? (car object) 'default)) + (xml-char-data? (cdr object))))) (define-xml-type !entity (name xml-qname?) - (value entity-value? canonicalize-entity-value)) + (value canonicalize canonicalize-entity-value)) (define-xml-type unparsed-!entity (name xml-qname?) @@ -367,16 +360,20 @@ USA. (define-xml-type parameter-!entity (name xml-qname?) - (value entity-value? canonicalize-entity-value)) - -(define (entity-value? object) - (or (and (pair? object) - (list-of-type? object - (lambda (object) - (or (xml-char-data? object) - (xml-entity-ref? object) - (xml-parameter-entity-ref? object))))) - (xml-external-id? object))) + (value canonicalize canonicalize-entity-value)) + +(define (canonicalize-entity-value object) + (if (xml-external-id? object) + object + (begin + (if (not (and (pair? object) + (list-of-type? object + (lambda (object) + (or (xml-char-data? object) + (xml-entity-ref? object) + (xml-parameter-entity-ref? object)))))) + (error:wrong-type-datum object "an XML !ENTITY value")) + (canonicalize-content object)))) (define-xml-type !notation (name xml-qname?) diff --git a/v7/src/xml/xml.pkg b/v7/src/xml/xml.pkg index b2564a2bc..d7d43ad35 100644 --- a/v7/src/xml/xml.pkg +++ b/v7/src/xml/xml.pkg @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: xml.pkg,v 1.34 2003/09/26 05:35:33 cph Exp $ +$Id: xml.pkg,v 1.35 2003/09/26 19:38:58 cph Exp $ Copyright 2001,2002,2003 Massachusetts Institute of Technology @@ -89,6 +89,7 @@ USA. + @@ -104,6 +105,7 @@ USA. error:not-xml-!element error:not-xml-!entity error:not-xml-!notation + error:not-xml-attribute error:not-xml-comment error:not-xml-declaration error:not-xml-document @@ -119,6 +121,7 @@ USA. guarantee-xml-!element guarantee-xml-!entity guarantee-xml-!notation + guarantee-xml-attribute guarantee-xml-comment guarantee-xml-declaration guarantee-xml-document @@ -134,6 +137,7 @@ USA. make-xml-!element make-xml-!entity make-xml-!notation + make-xml-attribute make-xml-comment make-xml-declaration make-xml-document @@ -153,6 +157,8 @@ USA. set-xml-!entity-value! set-xml-!notation-id! set-xml-!notation-name! + set-xml-attribute-name! + set-xml-attribute-value! set-xml-comment-text! set-xml-declaration-encoding! set-xml-declaration-standalone! @@ -196,8 +202,6 @@ USA. xml-attribute-name xml-attribute-namespace-decl? xml-attribute-value - xml-attribute-value-item? - xml-attribute-value? xml-attribute? xml-char-data? xml-comment-text @@ -241,7 +245,24 @@ USA. xml-unparsed-!entity-name xml-unparsed-!entity-notation xml-unparsed-!entity? - xml-whitespace-string?)) + xml-whitespace-string?) + (export (runtime xml) + %make-xml-!attlist + %make-xml-!element + %make-xml-!entity + %make-xml-!notation + %make-xml-attribute + %make-xml-comment + %make-xml-declaration + %make-xml-document + %make-xml-dtd + %make-xml-element + %make-xml-entity-ref + %make-xml-external-id + %make-xml-parameter-!entity + %make-xml-parameter-entity-ref + %make-xml-processing-instructions + %make-xml-unparsed-!entity)) (define-package (runtime xml parser) (files "xml-chars" "xml-parser") -- 2.25.1