From: Chris Hanson Date: Tue, 30 Sep 2003 04:33:46 +0000 (+0000) Subject: Second draft: this one uses a fully lazy copy of the XML structure so X-Git-Tag: 20090517-FFI~1778 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=d7bc14dccbd77e785d65e31e7765a4fc8313d58d;p=mit-scheme.git Second draft: this one uses a fully lazy copy of the XML structure so that the algorithms are concise _and_ efficient. This design also allows EQ? to be used when comparing nodes. --- diff --git a/v7/src/xml/xpath.scm b/v7/src/xml/xpath.scm index aca498aba..dc797768e 100644 --- a/v7/src/xml/xpath.scm +++ b/v7/src/xml/xpath.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: xpath.scm,v 1.1 2003/09/28 04:12:54 cph Exp $ +$Id: xpath.scm,v 1.2 2003/09/30 04:33:46 cph Exp $ Copyright 2003 Massachusetts Institute of Technology @@ -28,12 +28,13 @@ USA. (declare (usual-integrations)) (define-generic nodify-item (item parent index)) -(define-generic node-eq? (n1 n2)) (define-generic node-item (node)) (define-generic parent-node (node)) -(define-generic node-child-items (node)) -(define-generic walk-children+ (node index p a)) -(define-generic walk-children- (node index p a)) +(define-generic node-children (node)) +(define-generic next-node (node)) +(define-generic prev-node (node)) +(define-generic node-attrs (node)) +(define-generic node-ns-decls (node)) (define-generic parent-index (node)) (define-generic node-name (node)) (define-generic node-string (node)) @@ -41,117 +42,159 @@ USA. (define-class () (item accessor node-item)) -(define-method parent-node ((node )) - node - #f) +(define-method parent-node ((node )) node #f) +(define-method node-children ((node )) node '()) +(define-method next-node ((node )) node #f) +(define-method prev-node ((node )) node #f) +(define-method node-attrs ((node )) node '()) +(define-method node-ns-decls ((node )) node '()) +(define-method node-name ((node )) node #f) -(define-method node-child-items ((node )) - node - '()) - -(define-generic walk-children+ ((node ) index p a) - node index p a - unspecific) - -(define-generic walk-children- ((node ) index p a) - node index p a - unspecific) - -(define-method node-name ((node )) - node - #f) +(define-class ()) (define-class () (parent accessor parent-node)) -(define (parented-node-nodifier item-class node-class) - (let ((constructor (instance-constructor node-class '(item parent)))) - (define-method nodify-item ((item item-class) parent index) - index - (constructor item parent)))) - -(define-class ()) - -(define-method walk-children+ ((node ) index p a) - (let loop ((items (node-child-items node)) (i 0) (a a)) - (if (pair? items) - (loop (cdr items) - (fix:+ i 1) - (if (fix:> i index) - (p (nodify-item (car items) node i) a) - a)) - a))) - -(define-method walk-children- ((node ) index p a) - (let loop ((items (node-child-items node)) (i 0)) - (if (fix:< i index) - (p (nodify-item (car items) node i) - (loop (cdr items) (fix:+ i 1))) - a))) - (define-class () (index accessor parent-index)) (define (content-node-nodifier item-class node-class) - (let ((constructor (instance-constructor node-class '(item parent index)))) + (let ((make-node (instance-constructor node-class '(item parent index)))) (define-method nodify-item ((item item-class) parent index) - (constructor item parent index)))) - -(define-method node-eq? ((n1 ) (n2 )) - (and (eq? (instance-class n1) (instance-class n2)) - (eq? (node-item n1) (node-item n2)))) + (make-node item parent index)))) + +(define-method next-node ((node )) + (let ((children (node-children node))) + (if (pair? children) + (car children) + (let ((parent (parent-node node))) + (let ((siblings + (stream-tail (node-children parent) + (fix:+ (parent-index node) 1)))) + (if (pair? siblings) + (car siblings) + (next-node parent))))))) + +(define-method prev-node ((node )) + (let ((parent (parent-node node)) + (index (parent-index node))) + (let ((siblings (node-children parent))) + (if (and (pair? siblings) (fix:> index 0)) + (let loop ((node (stream-ref siblings (fix:- index 1)))) + (let ((children (node-children node))) + (if (pair? children) + (loop (car (stream-last-pair children))) + node))) + parent)))) + +(define (nodify items node) + (stream-map (lambda (item index) + (nodify-item attr node index)) + items + (let iota ((n 0)) + (cons-stream n (iota (fix:+ n 1)))))) + +(define (keep-matching predicate items) + (let loop ((items items)) + (if (pair? items) + (if (predicate (car items)) + (cons-stream (car items) (loop (cdr items))) + (loop (cdr items))) + '()))) -(define-method node-eq? ((n1 ) (n2 )) - (and (call-next-method n1 n2) - (node-eq? (parent-node n1) (parent-node n2)))) +(define (delete-matching predicate items) + (let loop ((items items)) + (if (pair? items) + (if (predicate (car items)) + (loop (cdr items)) + (cons-stream (car items) (loop (cdr items)))) + '()))) -(define-method node-eq? ((n1 ) (n2 )) - (and (call-next-method n1 n2) - (fix:= (parent-index n1) (parent-index n2)))) +(define (expand-name qname node) + (let ((prefix (xml-name-prefix qname))) + (if (null-xml-name-prefix? prefix) + qname + (make-xml-name qname (expand-prefix prefix node))))) + +(define (expand-prefix prefix node) + (cond ((eq? prefix 'xml) xml-iri) + ((eq? prefix 'xmlns) xmlns-iri) + (else + (let loop ((node node)) + (or (xml-element-namespace-iri (node-item node) prefix) + (let ((parent (parent-node node))) + (if (not (element-node? parent)) + (error:bad-range-argument prefix 'EXPAND-PREFIX)) + (loop parent))))))) (define-class () - (child-items accessor node-child-items)) + (children accessor node-children)) (define make-root-node - (let ((c (instance-constructor '(item child-items)))) + (let ((make-node (instance-constructor '(item))) + (set-node-children! (slot-modifier 'children))) (lambda (doc) - (c doc - (let ((p - (lambda (item) - (or (xml-comment? item) - (xml-processing-instructions? item))))) - (append! (keep-matching-items (xml-document-misc-1 doc) p) - (keep-matching-items (xml-document-misc-2 doc) p) - (list (xml-document-root doc)) - (keep-matching-items (xml-document-misc-3 doc) p))))))) + (let ((node (make-node doc))) + (set-node-children! + node + (nodify (let ((p + (lambda (item) + (or (xml-comment? item) + (xml-processing-instructions? item))))) + (stream-append (keep-matching p (xml-document-misc-1 doc)) + (keep-matching p (xml-document-misc-2 doc)) + (stream (xml-document-root doc)) + (keep-matching p (xml-document-misc-3 doc)))) + node)) + node)))) (define-method node-string ((node )) - (node-string - (let ((doc (node-item node))) - (nodify-item (xml-document-root doc) - node - (fix:+ (length (xml-document-misc-1 doc)) - (length (xml-document-misc-2 doc))))))) - - -(define-class ( )) - -(content-node-nodifier ) + (let loop ((children (node-children node))) + (if (element-node? (car children)) + (node-string (car children)) + (loop (force (cdr children)))))) + +(define-class ( ) + (children accessor node-children) + (attrs accessor node-attrs) + (ns-decls accessor node-ns-decls)) + +(let ((make-node (instance-constructor '(item parent index))) + (set-node-children! (slot-modifier 'children)) + (set-node-attrs! (slot-modifier 'attrs)) + (set-node-ns-decls! (slot-modifier 'ns-decls))) + (define-method nodify-item ((item ) parent index) + (let ((node (make-node item parent index))) + (set-node-children! node + (nodify (list->stream (xml-element-content item)) + node)) + (set-node-attrs! + node + (stream-map (lambda (attr) + (make-attribute-node attr node)) + (delete-matching xml-attribute-namespace-decl? + (xml-element-attributes item)))) + (set-node-ns-decls! + node + (stream-map (lambda (attr) + (make-namespace-node attr node)) + (keep-matching xml-attribute-namespace-decl? + (xml-element-attributes item)))) + node))) (define-method node-name ((node )) (xml-element-name (node-item node))) -(define-method node-child-items ((node )) - (xml-element-content (node-item node))) - (define-method node-string ((node )) (call-with-output-string (lambda (port) - (let loop ((elt (node-item node))) - (for-each (lambda (item) - (cond ((string? item) (write-string item port)) - ((xml-element? item) (loop item)))) - (xml-element-contents elt)))))) + (let loop ((node node)) + (stream-for-each (lambda (child) + (cond ((text-node? child) + (write-string (node-string child) port)) + ((element-node? child) + (loop child)))) + (node-children node)))))) (define-class ()) (define-class ()) @@ -174,6 +217,12 @@ USA. (null-xml-name-prefix) (xml-name-local name)))) +(define-method nodify-item ((attr ) parent index) + index + (if (xml-attribute-namespace-decl? attr) + (make-namespace-node attr parent) + (make-attribute-node attr parent))) + (define-class ()) @@ -202,201 +251,150 @@ USA. (define-method node-string ((node )) (node-item node)) -(define (node-n-children node) - (length (node-child-items node))) - -(define ((ped node-test size env) node pos) - (node-test node pos size env) - (fix:+ pos 1)) - -(define ((axis:self node-test) node env) - (node-test node 1 (delay 1) env)) - -(define ((axis:child node-test) node env) - (walk-children+ node - -1 - (ped node-test - (delay (node-n-children parent)) - env) - 1)) - -(define ((axis:following-sibling node-test) node env) - (let ((parent (parent-node node)) - (index (parent-index node))) - (walk-children+ parent - index - (ped node-test - (delay (fix:- (node-n-children parent) - (fix:+ index 1))) - env) - 1))) - -(define ((axis:preceding-sibling node-test) node env) - (let ((parent (parent-node node)) - (index (parent-index node))) - (walk-children- parent - index - (ped node-test (delay index) env) - 1))) - -(define ((axis:descendant node-test) node env) - (let ((p (ped node-test (delay (count-descendants node 0)) env))) - (let loop ((node node) (pos 1)) - (walk-children+ node - -1 - (lambda (node pos) - (loop node (p node pos))) - pos)))) - -(define ((axis:descendant-or-self node-test) node env) - (let ((p (ped node-test (delay (count-descendants node 1)) env))) - (let loop ((node node) (pos 1)) - (walk-children+ node - -1 - loop - (p node pos))))) - -(define (count-descendants node offset) - (let loop ((node node) (size offset)) - (walk-children+ node - -1 - (lambda (node size) - (loop node (fix:+ size 1))) - size))) - -(define ((axis:following node-test) node env) - ) - -(define ((axis:preceding node-test) node env) - ) - -(define ((axis:parent node-test) node env) - (let ((parent (node-parent node))) - (if parent - (node-test parent 1 (delay 1) env)))) - -(define ((axis:ancestor node-test) node env) - (let ((size (delay (count-ancestors node 0)))) - (let loop ((node node) (pos 1)) - (let ((parent (node-parent node))) - (if parent - (begin - (node-test parent pos size env) - (loop parent (fix:+ pos 1)))))))) - -(define ((axis:ancestor-or-self node-test) node env) - (let ((size (delay (count-ancestors node 1)))) - (let loop ((node node) (pos 1)) - (node-test node pos size env) - (let ((parent (node-parent node))) - (if parent - (loop parent (fix:+ pos 1))))))) - -(define (count-ancestors node offset) - (let loop ((node node) (size offset)) - (let ((parent (node-parent node))) - (if parent - (loop parent (fix:+ size 1)) - size)))) +;;;; Axes + +(define (axis:self node) + (cons-stream node '())) + +(define (axis:child node) + (node-children node)) + +(define (axis:descendant node) + (stream-append-map axis:descendant-or-self (axis:child node))) + +(define (axis:descendant-or-self node) + (cons-stream node (axis:descendant node))) + +(define (axis:parent node) + (if (parented-node? node) + (cons-stream (parent-node node) '()) + '())) + +(define (axis:ancestor node) + (if (parented-node? node) + (axis:ancestor-or-self (parent-node node)) + '())) + +(define (axis:ancestor-or-self node) + (cons-stream node (axis:ancestor node))) + +(define (axis:following-sibling node) + (if (content-node? node) + (stream-tail (node-children (parent-node node)) + (fix:+ (parent-index node) 1)) + '())) + +(define (axis:preceding-sibling node) + (if (content-node? node) + (let ((children (node-children (parent-node node)))) + (let loop ((index (parent-index node))) + (if (fix:> index 0) + (let ((index (fix:- index 1))) + (cons-stream (stream-ref children index) + (loop index))) + '()))) + '())) + +(define (axis:following node) + (let ((next (next-node node))) + (if next + (cons-stream next (axis:following next)) + '()))) + +(define (axis:preceding node) + (let ((prev (prev-node node))) + (if prev + (cons-stream prev (axis:preceding prev)) + '()))) + +(define (axis:attribute node) + (node-attrs node)) + +(define (axis:namespace node) + (let per-node ((node node) (seen '())) + (let per-decl ((decls (node-ns-decls node)) (seen seen)) + (if (pair? decls) + (let ((decl (car decls))) + (let ((qname (xml-name-qname (xml-attribute-name decl)))) + (if (memq qname seen) + (per-decl (force (cdr decls)) seen) + (cons-stream decl + (per-decl (force (cdr decls)) + (cons qname seen)))))) + (let ((parent (parent-node node))) + (if parent + (per-node parent seen) + '())))))) -(define ((axis:attribute node-test) node env) - (if (element-node? node) - (let ((attrs (xml-element-attributes node))) - (let ((p (ped node-test (delay (count-attributes attrs)) env))) - (do ((attrs attrs (cdr attrs)) - (pos 1 - (let ((attr (car attrs))) - (if (xml-attribute-namespace-decl? attr) - pos - (p (make-attribute-node attr node) pos))))) - ((not (pair? attrs)))))))) - -(define (count-attributes attrs) - (do ((attrs attrs (cdr attrs)) - (n 0 - (if (xml-attribute-namespace-decl? (car attrs)) - n - (fix:+ n 1)))) - ((not (pair? attrs)) n))) - -(define ((axis:namespace node-test) node env) - (if (element-node? node) - (let ((p (ped node-test (delay (count-namespace-decls node)) env)) - (seen (make-namespace-hash-table))) - (do ((node node (parent-node node)) - (pos 1 - (do ((attrs (xml-element-attributes (node-item node)) - (cdr attrs)) - (pos pos - (let ((attr (car attrs))) - (if (namespace-unseen? seen attr) - (p (make-namespace-node attr node) pos) - pos)))) - ((not (pair? attrs)) pos)))) - ((not (xml-element? node))))))) - -(define (count-namespace-decls node) - (let ((seen (make-namespace-hash-table))) - (do ((node node (parent-node node)) - (n 0 - (do ((attrs (xml-element-attributes (node-item node)) (cdr attrs)) - (n n - (if (namespace-unseen? seen (car attrs)) - (fix:+ n 1) - n))) - ((not (pair? attrs)))))) - ((not (element-node? node)) n)))) - -(define (make-namespace-hash-table) - (make-eq-hash-table)) - -(define (namespace-unseen? table attr) - (if (xml-attribute-namespace-decl? attr) - (let ((qname (xml-name-qname (xml-attribute-name attr)))) - (if (hash-table/get table qname #f) - #f - (begin - (hash-table/put! table qname #t) - #t))) - #f)) - -(define (node-test:* type predicate) +;;;; Node tests + +(define (node-test:* type) + (case type + ((element) (simple-node-test element-node?)) + ((attribute namespace) all-node-test) + (else (error:bad-range-argument type 'NODE-TEST:*)))) + +(define ((node-test:name qname) type) (case type - ((attribute namespace) - predicate) ((element) - (lambda (node pos size env) - (if (element-node? node) - (predicate node pos size env)))) - (else - (error:bad-range-argument type 'NODE-TEST:*)))) - -(define (node-test:name qname) - (lambda (type predicate) - (case type - ((attribute) - (lambda (node pos size env) - (if (xml-name=? (node-name node) (expand-name qname node)) - (predicate node pos size env)))) - ((namespace) - (lambda (node pos size env) - (if (xml-name=? (node-name node) qname) - (predicate node pos size env)))) - ((element) - (lambda (node pos size env) - (if (and (element-node? node) - (xml-name=? (node-name node) (expand-name qname node))) - (predicate node pos size env)))) - (else - (error:bad-range-argument type 'NODE-TEST:NAME))))) + (simple-node-test + (lambda (node) + (and (element-node? node) + (xml-name=? (node-name node) (expand-name qname node)))))) + ((attribute) + (simple-node-test + (lambda (node) + (xml-name=? (node-name node) (expand-name qname (node-parent node)))))) + ((namespace) + (simple-node-test + (lambda (node) + (xml-name=? (node-name node) qname)))) + (else (error:bad-range-argument type 'NODE-TEST:NAME)))) + +(define ((node-test:prefix:* prefix) type) + (case type + ((element) + (simple-node-test + (lambda (node) + (and (element-node? node) + (xml-name-iri=? (node-name node) (expand-prefix prefix node)))))) + ((attribute) + (simple-node-test + (lambda (node) + (xml-name-iri=? (node-name node) + (expand-prefix prefix (node-parent node)))))) + ((namespace) null-node-test) + (else (error:bad-range-argument type 'NODE-TEST:PREFIX:*)))) + +(define (node-test:text type) + (case type + ((element) (simple-node-test text-node?)) + ((attribute namespace) null-node-test) + (else (error:bad-range-argument type 'NODE-TEST:TEXT)))) -(define (expand-name qname node) - (let ((prefix (xml-name-prefix qname))) - (if (null-xml-name-prefix? prefix) - qname - (make-xml-name qname - (let loop ((node node)) - (if (element-node? node) - (or (xml-element-namespace-iri (node-item node) - prefix) - (loop (parent-node node))))))))) +(define (node-test:comment type) + (case type + ((element) (simple-node-test comment-node?)) + ((attribute namespace) null-node-test) + (else (error:bad-range-argument type 'NODE-TEST:COMMENT)))) + +(define ((node-test:processing-instruction name) type) + (case type + ((element) + (simple-node-test + (if name + (lambda (node) + (and (processing-instructions-node? node) + (eq? (node-name node) name))) + processing-instructions-node?))) + ((attribute namespace) null-node-test) + (else (error:bad-range-argument type 'NODE-TEST:PROCESSING-INSTRUCTION)))) + +(define (node-test:node type) + (case type + ((element attribute namespace) all-node-test) + (else (error:bad-range-argument type 'NODE-TEST:NODE)))) + +(define (null-node-test nodes) nodes '()) +(define (all-node-test nodes) nodes) +(define ((simple-node-test predicate) nodes) (stream-filter predicate nodes)) \ No newline at end of file