From c9dcc774cfb135717934b6c89ab23747af739725 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Wed, 2 Aug 2006 05:05:25 +0000 Subject: [PATCH] Clean up handling of bnode registration. Add mechanism for giving string "handles" to bnodes, so that in-memory bnodes can be uniquely identified. (Bnode names are scoped to a document and are unsuitable for this purpose.) --- v7/src/xml/rdf-nt.scm | 33 ++----- v7/src/xml/rdf-struct.scm | 199 ++++++++++++++++++++++++++------------ v7/src/xml/turtle.scm | 15 +-- v7/src/xml/xml.pkg | 10 +- 4 files changed, 162 insertions(+), 95 deletions(-) diff --git a/v7/src/xml/rdf-nt.scm b/v7/src/xml/rdf-nt.scm index dfed3cdbb..0c8054ed2 100644 --- a/v7/src/xml/rdf-nt.scm +++ b/v7/src/xml/rdf-nt.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: rdf-nt.scm,v 1.7 2006/06/22 19:17:26 cph Exp $ +$Id: rdf-nt.scm,v 1.8 2006/08/02 05:05:10 cph Exp $ Copyright 2006 Massachusetts Institute of Technology @@ -30,7 +30,7 @@ USA. ;;;; Decoder (define (read-rdf/nt-file pathname) - (fluid-let ((*bnodes* (make-bnode-table))) + (fluid-let ((*rdf-bnode-registry* (make-rdf-bnode-registry))) (call-with-input-file pathname (lambda (port) (let loop ((triples '())) @@ -41,20 +41,20 @@ USA. (define (rdf/nt-file->source pathname) (let ((port (open-input-file pathname)) - (table (make-bnode-table))) + (registry (make-rdf-bnode-registry))) (lambda () (let ((triple - (fluid-let ((*bnodes* table)) + (fluid-let ((*rdf-bnode-registry* registry)) (%read-rdf/nt port)))) (if (eof-object? triple) #f triple))))) (define (read-rdf/nt port) - (fluid-let ((*bnodes* (bnode-table port))) + (fluid-let ((*rdf-bnode-registry* (port/bnode-registry port))) (let ((triple (%read-rdf/nt port))) (if (eof-object? triple) - (drop-bnode-table port)) + (port/drop-bnode-registry port)) triple))) (define (%read-rdf/nt port) @@ -98,7 +98,7 @@ USA. (define parse-node-id (*parser - (encapsulate (lambda (v) (make-bnode (vector-ref v 0))) + (encapsulate (lambda (v) (make-rdf-bnode (vector-ref v 0))) (seq "_:" (match match-bnode-name))))) @@ -162,25 +162,6 @@ USA. (port/set-coding port 'UTF-8) (loop))) - -(define *bnodes*) - -(define (make-bnode-table) - (make-string-hash-table)) - -(define (bnode-table port) - (or (port/get-property port 'BNODE-TABLE #f) - (let ((table (make-string-hash-table))) - (port/set-property! port 'BNODE-TABLE table) - table))) - -(define (drop-bnode-table port) - (port/remove-property! port 'BNODE-TABLE)) - -(define (make-bnode name) - (hash-table/intern! *bnodes* name - (lambda () - (make-rdf-bnode name)))) (define match-ws* (*matcher (* (char-set char-set:ws)))) diff --git a/v7/src/xml/rdf-struct.scm b/v7/src/xml/rdf-struct.scm index 48c5f5598..5f8e90462 100644 --- a/v7/src/xml/rdf-struct.scm +++ b/v7/src/xml/rdf-struct.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: rdf-struct.scm,v 1.15 2006/08/01 17:23:49 cph Exp $ +$Id: rdf-struct.scm,v 1.16 2006/08/02 05:05:14 cph Exp $ Copyright 2006 Massachusetts Institute of Technology @@ -41,6 +41,32 @@ USA. (canonicalize-rdf-predicate predicate 'MAKE-RDF-TRIPLE) (canonicalize-rdf-object object 'MAKE-RDF-TRIPLE))) +(define (canonicalize-rdf-subject subject #!optional caller) + (cond ((rdf-bnode? subject) subject) + ((%decode-bnode-uri subject caller)) + (else (canonicalize-rdf-uri subject caller)))) + +(define (canonicalize-rdf-predicate predicate #!optional caller) + (canonicalize-rdf-uri predicate caller)) + +(define (canonicalize-rdf-object object #!optional caller) + (cond ((rdf-literal? object) object) + ((string? object) (make-rdf-literal object #f)) + (else (canonicalize-rdf-subject object caller)))) + +(define (canonicalize-rdf-uri uri #!optional caller) + (if (rdf-qname? uri) + (rdf-qname->uri uri) + (->absolute-uri uri caller))) + +(define (write-rdf-uri uri port) + (let ((qname (uri->rdf-qname uri #f))) + (if qname + (write-string (symbol-name qname) port) + (write-rdf-uri-ref uri port)))) + +;;;; Blank nodes + (define-record-type (%make-rdf-bnode name) rdf-bnode? @@ -55,17 +81,109 @@ USA. (write-rdf-bnode bnode port)))) (define (make-rdf-bnode #!optional name) - (%make-rdf-bnode - (cond ((default-object? name) - (generate-bnode-name)) - ((and (string? name) - (complete-match match-bnode-name name)) - name) - (else - (error:wrong-type-argument name "RDF bnode name" 'RDF-BNODE))))) + (cond ((default-object? name) + (let ((name (generate-bnode-name))) + (let ((bnode (%make-rdf-bnode name))) + (hash-table/put! *rdf-bnode-registry* name bnode) + bnode))) + ((and (string? name) + (complete-match match-bnode-name name)) + (hash-table/intern! *rdf-bnode-registry* name + (lambda () + (%make-rdf-bnode name)))) + (else + (error:wrong-type-argument name "RDF bnode name" 'RDF-BNODE)))) (define (generate-bnode-name) - (string-append "B" (vector-8b->hexadecimal (random-byte-vector 8)))) + (let loop () + (let ((name + (string-append "B" + (vector-8b->hexadecimal (random-byte-vector 8))))) + (if (hash-table/get *rdf-bnode-registry* name #f) + (loop) + name)))) + +(define (%decode-bnode-uri uri caller) + (let ((lookup + (lambda (uri) + (let ((bnode + (hash-table/get *rdf-bnode-registry* + (string-tail uri 2) + #f))) + (if (not bnode) + (error:bad-range-argument uri caller)) + bnode)))) + (let ((handle-uri + (lambda (uri) + (cond ((complete-match match-bnode-uri uri) (lookup uri)) + ((handle->rdf-bnode uri #f)) + (else #f))))) + (cond ((string? uri) (handle-uri uri)) + ((symbol? uri) (handle-uri (symbol-name uri))) + (else #f))))) + +(define (complete-match matcher string) + (let ((buffer (string->parser-buffer string))) + (and (matcher buffer) + (not (peek-parser-buffer-char buffer))))) + +(define match-bnode-uri + (*matcher (seq "_:" match-bnode-name))) + +(define match-bnode-name + (let* ((name-head + (char-set-union (ascii-range->char-set #x41 #x5B) + (ascii-range->char-set #x61 #x7B))) + (name-tail + (char-set-union name-head + (ascii-range->char-set #x30 #x3A)))) + (*matcher + (seq (char-set name-head) + (* (char-set name-tail)))))) + +(define (make-rdf-bnode-registry) + (make-string-hash-table)) + +(define *rdf-bnode-registry* + (make-rdf-bnode-registry)) + +(define (port/bnode-registry port) + (or (port/get-property port 'PORT/BNODE-REGISTRY #f) + (let ((table (make-string-hash-table))) + (port/set-property! port 'PORT/BNODE-REGISTRY table) + table))) + +(define (port/drop-bnode-registry port) + (port/remove-property! port 'PORT/DROP-BNODE-REGISTRY)) + +(define (rdf-bnode->handle bnode) + (string-append bnode-handle-prefix (number->string (hash bnode)))) + +(define (handle->rdf-bnode handle #!optional caller) + (let ((v + (and (string? handle) + (parse-bnode-handle (string->parser-buffer handle))))) + (if v + (unhash (vector-ref v 0)) + (begin + (if caller + (error:wrong-type-argument handle "RDF bnode handle" caller)) + #f)))) + +(define parse-bnode-handle + (let ((prefix + (lambda (b) (match-parser-buffer-string b bnode-handle-prefix))) + (digits (ascii-range->char-set #x30 #x3A))) + (*parser + (seq (noise prefix) + (map (lambda (s) (string->number s 10 #t)) + (match (+ (char-set digits)))) + (noise (end-of-input)))))) + +(define bnode-handle-prefix + "_bnode_:") + +;;;; Literals (define-record-type (%make-rdf-literal text type) @@ -91,6 +209,15 @@ USA. type (->absolute-uri type 'RDF-LITERAL)))) +(define match-language + (let* ((language-head (ascii-range->char-set #x61 #x7B)) + (language-tail + (char-set-union language-head + (ascii-range->char-set #x30 #x3A)))) + (*matcher + (seq (+ (char-set language-head)) + (* (seq #\- (+ (char-set language-tail)))))))) + (define (rdf-literal-type literal) (let ((type (%rdf-literal-type literal))) (and (absolute-uri? type) @@ -105,6 +232,8 @@ USA. (and (string=? (rdf-literal-text l1) (rdf-literal-text l2)) (eq? (%rdf-literal-type l1) (%rdf-literal-type l2)))) +;;;; Triples index (deprecated) + (define-record-type (%make-rdf-index subjects predicates objects) rdf-index? @@ -133,56 +262,6 @@ USA. (let ((o (rdf-triple-object triple))) (if (not (rdf-literal? o)) (add o (rdf-index-objects index)))))) - -(define (canonicalize-rdf-uri uri #!optional caller) - (or (rdf-qname->uri uri #f) - (->absolute-uri uri caller))) - -(define (write-rdf-uri uri port) - (let ((qname (uri->rdf-qname uri #f))) - (if qname - (write-string (symbol-name qname) port) - (write-rdf-uri-ref uri port)))) - -(define (canonicalize-rdf-subject subject #!optional caller) - (if (rdf-bnode? subject) - subject - (canonicalize-rdf-uri subject caller))) - -(define (canonicalize-rdf-predicate predicate #!optional caller) - (canonicalize-rdf-uri predicate caller)) - -(define (canonicalize-rdf-object object #!optional caller) - (cond ((or (rdf-bnode? object) - (rdf-literal? object)) - object) - ((string? object) (make-rdf-literal object #f)) - (else (canonicalize-rdf-uri object caller)))) - -(define match-bnode-name - (let* ((name-head - (char-set-union (ascii-range->char-set #x41 #x5B) - (ascii-range->char-set #x61 #x7B))) - (name-tail - (char-set-union name-head - (ascii-range->char-set #x30 #x3A)))) - (*matcher - (seq (char-set name-head) - (* (char-set name-tail)))))) - -(define match-language - (let* ((language-head (ascii-range->char-set #x61 #x7B)) - (language-tail - (char-set-union language-head - (ascii-range->char-set #x30 #x3A)))) - (*matcher - (seq (+ (char-set language-head)) - (* (seq #\- (+ (char-set language-tail)))))))) - -(define (complete-match matcher string) - (let ((buffer (string->parser-buffer string))) - (and (matcher buffer) - (not (peek-parser-buffer-char buffer))))) ;;;; Qnames diff --git a/v7/src/xml/turtle.scm b/v7/src/xml/turtle.scm index b42920f2f..f5911b36c 100644 --- a/v7/src/xml/turtle.scm +++ b/v7/src/xml/turtle.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: turtle.scm,v 1.6 2006/07/29 01:25:58 cph Exp $ +$Id: turtle.scm,v 1.7 2006/08/02 05:05:20 cph Exp $ Copyright 2006 Massachusetts Institute of Technology @@ -31,12 +31,13 @@ USA. (let ((pathname (pathname-default-type pathname "ttl"))) (call-with-input-file pathname (lambda (port) - (post-process-parser-output - (parse-turtle-doc (input-port->parser-buffer port)) - (if (default-object? base-uri) - (pathname->uri (merge-pathnames pathname)) - (merge-uris (file-namestring pathname) - (->absolute-uri base-uri 'read-turtle-file)))))))) + (fluid-let ((*rdf-bnode-registry* (make-rdf-bnode-registry))) + (post-process-parser-output + (parse-turtle-doc (input-port->parser-buffer port)) + (if (default-object? base-uri) + (pathname->uri (merge-pathnames pathname)) + (merge-uris (file-namestring pathname) + (->absolute-uri base-uri 'read-turtle-file))))))))) (define (parse-turtle-doc buffer) (parse:ws* buffer) diff --git a/v7/src/xml/xml.pkg b/v7/src/xml/xml.pkg index 65dab5d01..711135630 100644 --- a/v7/src/xml/xml.pkg +++ b/v7/src/xml/xml.pkg @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: xml.pkg,v 1.79 2006/08/01 17:23:50 cph Exp $ +$Id: xml.pkg,v 1.80 2006/08/02 05:05:25 cph Exp $ Copyright 2001,2002,2003,2004,2005,2006 Massachusetts Institute of Technology @@ -527,6 +527,7 @@ USA. (parent (runtime rdf)) (export () *default-rdf-prefix-registry* + *rdf-bnode-registry* @@ -552,12 +553,15 @@ USA. guarantee-rdf-prefix-registry guarantee-rdf-qname guarantee-rdf-triple + handle->rdf-bnode + make-rdf-bnode-registry make-rdf-bnode make-rdf-index make-rdf-literal make-rdf-qname make-rdf-triple new-rdf-prefix-registry + rdf-bnode->handle rdf-bnode-name rdf-bnode? rdf-index-objects @@ -588,7 +592,9 @@ USA. (export (runtime rdf) %make-rdf-literal match-bnode-name - match-language)) + match-language + port/bnode-registry + port/drop-bnode-registry)) (define-package (runtime rdf nt) (files "rdf-nt") -- 2.25.1