From fdac71c8382601b5876460d4ffdc63fe70ea7285 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Tue, 24 Apr 2018 22:56:12 -0700 Subject: [PATCH] Rewrite the object hashing facility. * Names have been changed to avoid conflicts with hash tables, and for consistency. The old names are still available as deprecated renames. * New implementation is much easier to read. --- doc/ref-manual/associations.texi | 104 ++++++----------- src/compiler/base/object.scm | 2 +- src/compiler/rtlopt/rcse2.scm | 2 +- src/compiler/rtlopt/rdebug.scm | 2 +- src/edwin/evlcom.scm | 2 +- src/edwin/regcom.scm | 4 +- src/edwin/regexp.scm | 6 +- src/edwin/struct.scm | 2 +- src/runtime/boot.scm | 4 +- src/runtime/hash.scm | 188 ++++++++++++------------------- src/runtime/parser.scm | 2 +- src/runtime/prop2d.scm | 30 ++--- src/runtime/rep.scm | 2 +- src/runtime/runtime.pkg | 20 ++-- src/runtime/swank.scm | 6 +- src/runtime/unpars.scm | 2 +- src/runtime/world-report.scm | 2 +- src/sos/printer.scm | 2 +- src/xml/rdf-struct.scm | 4 +- 19 files changed, 161 insertions(+), 225 deletions(-) diff --git a/doc/ref-manual/associations.texi b/doc/ref-manual/associations.texi index c84fb5da9..e1b5ac4b2 100644 --- a/doc/ref-manual/associations.texi +++ b/doc/ref-manual/associations.texi @@ -1032,31 +1032,31 @@ but it may be used for other things as well. In particular, it is used in the generation of the written representation for many objects (@pxref{Custom Output}). -All of these procedures accept an optional argument called @var{table}; -this table contains the object-integer associations. If given, this -argument must be an object-hash table as constructed by -@code{hash-table/make} (see below). If not given, a default table is -used. +All of these procedures accept an optional argument called +@var{hasher} which contains the object-integer associations. If +given, this argument must be an object hasher as constructed by +@code{make-object-hasher} (see below). If not given, a default table +is used. -@deffn procedure hash object [table] +@deffn procedure hash-object object [hasher] @findex eq? -@code{hash} associates an exact non-negative integer with @var{object} -and returns that integer. If @code{hash} was previously called with -@var{object} as its argument, the integer returned is the same as was -returned by the previous call. @code{hash} guarantees that distinct -objects (in the sense of @code{eq?}) are associated with distinct -integers. -@end deffn - -@deffn procedure unhash k [table] -@code{unhash} takes an exact non-negative integer @var{k} and returns -the object associated with that integer. If there is no object -associated with @var{k}, or if the object previously associated with -@var{k} has been reclaimed by the garbage collector, an error of type -@code{condition-type:bad-range-argument} is signalled. In other words, -if @code{hash} previously returned @var{k} for some object, and that -object has not been reclaimed, it is the value of the call to -@code{unhash}. +@code{hash-object} associates an exact non-negative integer with +@var{object} and returns that integer. If @code{hash-object} was +previously called with @var{object} as its argument, the integer +returned is the same as was returned by the previous call. +@code{hash-object} guarantees that distinct objects (in the sense of +@code{eqv?}) are associated with distinct integers. +@end deffn + +@deffn procedure unhash-object k [hasher] +@code{unhash-object} takes an exact non-negative integer @var{k} and +returns the object associated with that integer. If there is no +object associated with @var{k}, or if the object previously associated +with @var{k} has been reclaimed by the garbage collector, an error of +type @code{condition-type:bad-range-argument} is signalled. In other +words, if @code{hash-object} previously returned @var{k} for some +object, and that object has not been reclaimed, it is the value of the +call to @code{unhash-object}. @findex condition-type:bad-range-argument @end deffn @@ -1069,60 +1069,30 @@ reclaimed) object will signal an error. @example @group (define x (cons 0 0)) @result{} @r{unspecified} -(hash x) @result{} 77 -(eqv? (hash x) (hash x)) @result{} #t +(hash-object x) @result{} 77 +(eqv? (hash-object x) + (hash-object x)) @result{} #t (define x 0) @result{} @r{unspecified} (gc-flip) @r{;force a garbage collection} -(unhash 77) @error{} +(unhash-object 77) @error{} @end group @end example -@deffn procedure object-hashed? object [table] -This predicate is true if @var{object} has an associated hash number. -Otherwise it is false. +@deffn procedure object-hashed? object [hasher] +This predicate is true iff @var{object} has an associated hash number. @end deffn -@deffn procedure valid-hash-number? k [table] -This predicate is true if @var{k} is the hash number associated with -some object. Otherwise it is false. +@deffn procedure valid-object-hash? k [hasher] +This predicate is true iff @var{k} is the hash number associated with +some object. @end deffn -The following two procedures provide a lower-level interface to the -object-hashing mechanism. +Finally, this procedure makes new object hashers: -@deffn procedure object-hash object [table [insert?]] -@findex eq? -@code{object-hash} is like @code{hash}, except that it accepts an -additional optional argument, @var{insert?}. If @var{insert?}@: is -supplied and is @code{#f}, @code{object-hash} will return an integer for -@var{object} only if there is already an association in the table; -otherwise, it will return @code{#f}. If @var{insert?} is not supplied, -or is not @code{#f}, @code{object-hash} always returns an integer, -creating an association in the table if necessary. - -@code{object-hash} additionally treats @code{#f} differently than does -@code{hash}. Calling @code{object-hash} with @code{#f} as its argument -will return an integer that, when passed to @code{unhash}, will signal -an error rather than returning @code{#f}. Likewise, -@code{valid-hash-number?} will return @code{#f} for this integer. -@end deffn - -@deffn procedure object-unhash k [table] -@code{object-unhash} is like @code{unhash}, except that when @var{k} is -not associated with any object or was previously associated with an -object that has been reclaimed, @code{object-unhash} returns @code{#f}. -This means that there is an ambiguity in the value returned by -@code{object-unhash}: if @code{#f} is returned, there is no way to -tell if @var{k} is associated with @code{#f} or is not associated with -any object at all. -@end deffn - -Finally, this procedure makes new object-hash tables: - -@deffn procedure hash-table/make -This procedure creates and returns a new, empty object-hash table that -is suitable for use as the optional @var{table} argument to the above -procedures. The returned table contains no associations. +@deffn procedure make-object-hasher +This procedure creates and returns a new, empty object hasher that +is suitable for use as the optional @var{hasher} argument to the above +procedures. The returned hasher contains no associations. @end deffn @node Red-Black Trees, Weight-Balanced Trees, Object Hashing, Associations diff --git a/src/compiler/base/object.scm b/src/compiler/base/object.scm index 2ae868bd9..8712258c2 100644 --- a/src/compiler/base/object.scm +++ b/src/compiler/base/object.scm @@ -119,7 +119,7 @@ USA. (define (->tagged-vector object) (let ((object (if (exact-nonnegative-integer? object) - (unhash object) + (unhash-object object) object))) (and (or (tagged-vector? object) (named-structure? object)) diff --git a/src/compiler/rtlopt/rcse2.scm b/src/compiler/rtlopt/rcse2.scm index b2a3e0c41..4642e385c 100644 --- a/src/compiler/rtlopt/rcse2.scm +++ b/src/compiler/rtlopt/rcse2.scm @@ -129,7 +129,7 @@ USA. (cond ((integer? object) (inexact->exact object)) ((symbol? object) (symbol-hash object)) ((string? object) (string-hash object)) - (else (hash object)))))) + (else (hash-object object)))))) (let ((hash (loop expression))) (receiver (modulo hash (rcse-ht-size)) diff --git a/src/compiler/rtlopt/rdebug.scm b/src/compiler/rtlopt/rdebug.scm index 11590d746..9708b0b0e 100644 --- a/src/compiler/rtlopt/rdebug.scm +++ b/src/compiler/rtlopt/rdebug.scm @@ -50,7 +50,7 @@ USA. (write-string "; multiple blocks"))) (bblock (write-string "; block ") - (write (unhash bblock))) + (write (unhash-object bblock))) (else (write-string "; no block!")))))))))) diff --git a/src/edwin/evlcom.scm b/src/edwin/evlcom.scm index 83ef83d57..7634633ca 100644 --- a/src/edwin/evlcom.scm +++ b/src/edwin/evlcom.scm @@ -528,7 +528,7 @@ Set by Scheme evaluation code to update the mode line." (not (number? value))) (string-append " " - (write-to-string (object-hash value))) + (write-to-string (hash-object value))) "") ": "))) diff --git a/src/edwin/regcom.scm b/src/edwin/regcom.scm index 46cf655f1..c8117d399 100644 --- a/src/edwin/regcom.scm +++ b/src/edwin/regcom.scm @@ -175,7 +175,7 @@ With prefix arg, delete as well." '()) (define (make-buffer-position mark buffer) - (cons buffer-position-tag (cons mark (hash buffer)))) + (cons buffer-position-tag (cons mark (hash-object buffer)))) (define (buffer-position? object) (and (pair? object) @@ -188,4 +188,4 @@ With prefix arg, delete as well." (cadr position)) (define-integrable (buffer-position-buffer position) - (unhash (cddr position))) \ No newline at end of file + (unhash-object (cddr position))) \ No newline at end of file diff --git a/src/edwin/regexp.scm b/src/edwin/regexp.scm index d2dfebe3e..661c044b4 100644 --- a/src/edwin/regexp.scm +++ b/src/edwin/regexp.scm @@ -28,7 +28,7 @@ USA. (declare (usual-integrations)) -(define hash-of-false (object-hash #f)) +(define hash-of-false (hash-object #f)) (define match-group hash-of-false) (define (re-match-start i) @@ -48,13 +48,13 @@ USA. (re-match-end-index i))) (define (re-match-group) - (let ((group (object-unhash match-group))) + (let ((group (unhash-object match-group))) (if (not group) (error "No match group")) group)) (define (re-match-data) - (let ((group (object-unhash match-group))) + (let ((group (unhash-object match-group))) (cons group (if group (let ((v (make-vector 20))) diff --git a/src/edwin/struct.scm b/src/edwin/struct.scm index 5ba5e7623..2080c6bd4 100644 --- a/src/edwin/struct.scm +++ b/src/edwin/struct.scm @@ -192,7 +192,7 @@ USA. (define (group-hash-number group) (or (group-%hash-number group) - (let ((n (object-hash group))) + (let ((n (hash-object group))) (set-group-%hash-number! group n) n))) diff --git a/src/runtime/boot.scm b/src/runtime/boot.scm index 7ac9cd8fd..73a960fd8 100644 --- a/src/runtime/boot.scm +++ b/src/runtime/boot.scm @@ -324,7 +324,7 @@ USA. (if (get-param:unparse-with-maximum-readability?) (begin (write-string "#@" port) - (write (object-hash object) port)) + (write (hash-object object) port)) (procedure object port)))))) (define (bracketed-unparser-method procedure) @@ -342,7 +342,7 @@ USA. name) port) (write-char #\space port) - (write (object-hash object) port) + (write (hash-object object) port) (if procedure (procedure object port))))) (define (simple-unparser-method name get-parts) diff --git a/src/runtime/hash.scm b/src/runtime/hash.scm index ed0967372..7a64a4db0 100644 --- a/src/runtime/hash.scm +++ b/src/runtime/hash.scm @@ -27,122 +27,84 @@ USA. ;;;; Object Hashing ;;; package: (runtime hash) -(declare (usual-integrations)) - -;;;; Object hashing - ;;; How this works: -;;; There are two tables, the hash table and the unhash table: +;;; There are two tables: -;;; - The hash table associates objects to their hash numbers. The -;;; entries are keyed according to the address (datum) of the object. +;;; * The hash table associates objects (as compared by eqv?) to their hash +;;; numbers. -;;; - The unhash table associates the hash numbers with the -;;; corresponding objects. It is keyed according to the numbers -;;; themselves. +;;; * The unhash table associates the hash numbers back to the hashed objects. -;;; Both tables hold the objects weakly. Thus the hash table holds -;;; its keys weakly, and the unhash table holds its values weakly. - -(define default/hash-table-size 313) -(define default-hash-table) - -(define (initialize-package!) - (set! make-datum-weak-eq-hash-table - (hash-table-constructor - (make-hash-table-type eq-hash-mod eq? #f - hash-table-entry-type:datum-weak))) - (set! default-hash-table (hash-table/make))) - -(define-structure (hash-table - (conc-name hash-table/) - (constructor %hash-table/make)) - (mutex) - (next-number) - (hash-table) - (unhash-table)) - -(define make-datum-weak-eq-hash-table) - -(define (hash-table/make #!optional size) - (let ((size (if (default-object? size) - default/hash-table-size - size))) - (%hash-table/make - (make-thread-mutex) - 1 - (make-key-weak-eq-hash-table size) - (make-datum-weak-eq-hash-table size)))) - -(define (hash x #!optional table) - (if (eq? x #f) - 0 - (object-hash x - (if (default-object? table) default-hash-table table) - #t))) - -(define (unhash n #!optional table) - (if (= n 0) - #f - (let ((object - (object-unhash n - (if (default-object? table) - default-hash-table - table)))) - (if (not object) - (error:bad-range-argument n 'unhash)) - object))) - -(define (valid-hash-number? n #!optional table) - (or (= n 0) - (object-unhash n (if (default-object? table) default-hash-table table)))) - -(define (object-hashed? x #!optional table) - (or (eq? x #f) - (object-hash x - (if (default-object? table) default-hash-table table) - #f))) +;;; Both tables hold the objects weakly; the hash table holds its keys weakly, +;;; and the unhash table holds its values weakly. + +(declare (usual-integrations)) -(define (object-hash object #!optional table insert?) - (let ((table - (if (default-object? table) - default-hash-table - (begin - (if (not (hash-table? table)) - (error:wrong-type-argument table - "object-hash table" - 'object-hash)) - table))) - (insert? (or (default-object? insert?) insert?))) - (with-thread-mutex-lock (hash-table/mutex table) - (lambda () - (let ((number - (hash-table-ref/default (hash-table/hash-table table) - object - #f))) - (if (not number) - (if insert? - (let ((hashtb (hash-table/hash-table table)) - (unhashtb (hash-table/unhash-table table)) - (next (hash-table/next-number table))) - (set-hash-table/next-number! table (1+ next)) - (hash-table-set! unhashtb next object) - (hash-table-set! hashtb object next) - next) - number) - number)))))) - -(define (object-unhash number #!optional table) - (let ((table - (if (default-object? table) - default-hash-table - (begin - (if (not (hash-table? table)) - (error:wrong-type-argument table - "object-hash table" - 'object-unhash)) - table)))) - (with-thread-mutex-lock (hash-table/mutex table) - (lambda () - (hash-table-ref/default (hash-table/unhash-table table) number #f))))) \ No newline at end of file +(define (hash-object object #!optional hasher) + ((get-operation hasher 'hash-object) object)) + +(define (object-hashed? object #!optional hasher) + ((get-operation hasher 'object-hashed?) object)) + +(define (unhash-object hash #!optional hasher) + ((get-operation hasher 'unhash-object) hash)) + +(define (valid-object-hash? hash #!optional hasher) + ((get-operation hasher 'valid-object-hash?) hash)) + +(define (get-operation hasher operator) + ((if (default-object? hasher) + default-object-hasher + hasher) + operator)) + +(define-deferred default-object-hasher + (make-object-hasher 313)) + +(define (make-object-hasher #!optional initial-size) + (let ((mutex (make-thread-mutex)) + (next-hash 1) + (hash-table (make-key-weak-eqv-hash-table initial-size)) + (unhash-table (make-datum-weak-eqv-hash-table initial-size))) + + (define (hash-object object) + (if (eq? object #f) + 0 + (with-thread-mutex-lock mutex + (lambda () + (hash-table-intern! hash-table object + (lambda () + (let ((hash next-hash)) + (set! next-hash (+ next-hash 1)) + (hash-table-set! unhash-table hash object) + hash))))))) + + (define (object-hashed? object) + (or (eq? object #f) + (with-thread-mutex-lock mutex + (lambda () + (hash-table-exists? hash-table object))))) + + (define (unhash-object hash) + (guarantee exact-nonnegative-integer? hash 'unhash-object) + (if (= hash 0) + #f + (with-thread-mutex-lock mutex + (lambda () + (hash-table-ref unhash-table hash))))) + + (define (valid-object-hash? hash) + (guarantee exact-nonnegative-integer? hash 'valid-object-hash?) + (or (= hash 0) + (with-thread-mutex-lock mutex + (lambda () + (hash-table-exists? unhash-table hash))))) + + (lambda (operator) + (case operator + ((hash-object) hash-object) + ((object-hashed?) object-hashed?) + ((unhash-object) unhash-object) + ((valid-object-hash?) valid-object-hash?) + (else (error "Unknown operator:" operator)))))) \ No newline at end of file diff --git a/src/runtime/parser.scm b/src/runtime/parser.scm index d65c0409f..8b4963ce1 100644 --- a/src/runtime/parser.scm +++ b/src/runtime/parser.scm @@ -593,7 +593,7 @@ USA. (error:illegal-unhash object)) (if (eq? object 0) #f - (or (object-unhash object) + (or (unhash-object object) (error:undefined-hash object)))) (define (handler:quote db ctx char) diff --git a/src/runtime/prop2d.scm b/src/runtime/prop2d.scm index d33675df1..039e93dc7 100644 --- a/src/runtime/prop2d.scm +++ b/src/runtime/prop2d.scm @@ -38,8 +38,8 @@ USA. (define system-properties) (define (2d-put! x y value) - (let ((x-hash (object-hash x)) - (y-hash (object-hash y))) + (let ((x-hash (hash-object x)) + (y-hash (hash-object y))) (let ((bucket (assq x-hash system-properties))) (if bucket (let ((entry (assq y-hash (cdr bucket)))) @@ -55,9 +55,9 @@ USA. system-properties)))))) (define (2d-get x y) - (let ((bucket (assq (object-hash x) system-properties))) + (let ((bucket (assq (hash-object x) system-properties))) (and bucket - (let ((entry (assq (object-hash y) (cdr bucket)))) + (let ((entry (assq (hash-object y) (cdr bucket)))) (and entry (cdr entry)))))) @@ -65,14 +65,14 @@ USA. ;;; Removes the bucket if the entry removed was the only entry. (define (2d-remove! x y) - (let ((bucket (assq (object-hash x) system-properties))) + (let ((bucket (assq (hash-object x) system-properties))) (and bucket (begin (set-cdr! bucket - (del-assq! (object-hash y) + (del-assq! (hash-object y) (cdr bucket))) (if (null? (cdr bucket)) (set! system-properties - (del-assq! (object-hash x) + (del-assq! (hash-object x) system-properties))) true)))) @@ -83,36 +83,36 @@ USA. (set! system-properties (delete-invalid-hash-numbers! system-properties))) (define (filter-bucket! bucket) - (or (not (valid-hash-number? (car bucket))) + (or (not (valid-object-hash? (car bucket))) (begin (set-cdr! bucket (delete-invalid-y! (cdr bucket))) (null? (cdr bucket))))) (define (filter-entry! entry) - (not (valid-hash-number? (car entry)))) + (not (valid-object-hash? (car entry)))) (define delete-invalid-hash-numbers!) (define delete-invalid-y!) (define (2d-get-alist-x x) - (let ((bucket (assq (object-hash x) system-properties))) + (let ((bucket (assq (hash-object x) system-properties))) (if bucket (let loop ((rest (cdr bucket))) (cond ((null? rest) '()) - ((valid-hash-number? (caar rest)) - (cons (cons (object-unhash (caar rest)) + ((valid-object-hash? (caar rest)) + (cons (cons (unhash-object (caar rest)) (cdar rest)) (loop (cdr rest)))) (else (loop (cdr rest))))) '()))) (define (2d-get-alist-y y) - (let ((y-hash (object-hash y))) + (let ((y-hash (hash-object y))) (let loop ((rest system-properties)) (cond ((null? rest) '()) - ((valid-hash-number? (caar rest)) + ((valid-object-hash? (caar rest)) (let ((entry (assq y-hash (cdar rest)))) (if entry - (cons (cons (object-unhash (caar rest)) + (cons (cons (unhash-object (caar rest)) (cdr entry)) (loop (cdr rest))) (loop (cdr rest))))) diff --git a/src/runtime/rep.scm b/src/runtime/rep.scm index 63f2da82a..9699abc67 100644 --- a/src/runtime/rep.scm +++ b/src/runtime/rep.scm @@ -507,7 +507,7 @@ USA. (object-pointer? object) (not (interned-symbol? object)) (not (number? object)) - (object-hash object)) + (hash-object object)) environment)) (define (repl-eval/write s-expression #!optional environment repl) diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index ffac4dc59..1f779e62f 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -2358,15 +2358,19 @@ USA. (define-package (runtime hash) (files "hash") (parent (runtime)) - (export () - hash - hash-table/make - object-hash + (export () deprecated:hash + (hash hash-object) + (hash-table/make make-object-hasher) + (object-hash hash-object) + (object-unhash unhash-object) + (unhash unhash-object) + (valid-hash-number? valid-object-hash?)) + (export () + hash-object + make-object-hasher object-hashed? - object-unhash - unhash - valid-hash-number?) - (initialization (initialize-package!))) + unhash-object + valid-object-hash?)) (define-package (runtime hash-table) (files "hash-table") diff --git a/src/runtime/swank.scm b/src/runtime/swank.scm index 45dc1c87a..be1d11289 100644 --- a/src/runtime/swank.scm +++ b/src/runtime/swank.scm @@ -241,7 +241,7 @@ USA. (get-current-environment)) ((string-prefix? anonymous-package-prefix pstring) (let ((object - (object-unhash + (unhash-object (string->number (string-tail pstring (string-length anonymous-package-prefix)) @@ -257,7 +257,7 @@ USA. (let ((package (environment->package env))) (if package (write-to-string (package/name package)) - (string anonymous-package-prefix (object-hash env))))) + (string anonymous-package-prefix (hash-object env))))) (define anonymous-package-prefix "environment-") @@ -283,7 +283,7 @@ USA. (let ((value (repl-eval sexp socket))) (call-with-output-string (lambda (port) - (port/write-result port sexp value (object-hash value) (buffer-env)) + (port/write-result port sexp value (hash-object value) (buffer-env)) (if nl? (newline port)))))) (define (for-each-sexp procedure string) diff --git a/src/runtime/unpars.scm b/src/runtime/unpars.scm index 472d8fe80..e6ab9fc2c 100644 --- a/src/runtime/unpars.scm +++ b/src/runtime/unpars.scm @@ -344,7 +344,7 @@ USA. (*unparse-string (number->string number 16) context)) (define-integrable (*unparse-hash object context) - (*unparse-string (number->string (hash object)) context)) + (*unparse-string (number->string (hash-object object)) context)) (define (*unparse-readable-hash object context) (*unparse-string "#@" context) diff --git a/src/runtime/world-report.scm b/src/runtime/world-report.scm index 153b1b768..52b2519b0 100644 --- a/src/runtime/world-report.scm +++ b/src/runtime/world-report.scm @@ -170,7 +170,7 @@ USA. '())) flags)) (newline port))) - (sort (map (lambda (t) (cons (hash t) t)) (threads-list)) + (sort (map (lambda (t) (cons (hash-object t) t)) (threads-list)) (lambda (a b) (< (car a) (car b)))))) (define (write-state thread port) diff --git a/src/sos/printer.scm b/src/sos/printer.scm index 1c0548402..07a01af14 100644 --- a/src/sos/printer.scm +++ b/src/sos/printer.scm @@ -87,7 +87,7 @@ USA. (if object (begin (write-char #\space port) - (write (hash object) port))) + (write (hash-object object) port))) (if thunk (thunk)) (write-char #\] port)) diff --git a/src/xml/rdf-struct.scm b/src/xml/rdf-struct.scm index d31561b39..a5a5b3958 100644 --- a/src/xml/rdf-struct.scm +++ b/src/xml/rdf-struct.scm @@ -165,7 +165,7 @@ USA. (hash-table-intern! *rdf-bnode-registry* name %make-rdf-bnode)))) (define (rdf-bnode-name bnode) - (string-append "B" (number->string (hash bnode)))) + (string-append "B" (number->string (hash-object bnode)))) (define (%decode-bnode-uri uri) (let ((v @@ -173,7 +173,7 @@ USA. ((symbol? uri) (*parse-symbol parse-bnode uri)) (else #f)))) (and v - (unhash (vector-ref v 0))))) + (unhash-object (vector-ref v 0))))) (define parse-bnode (let ((digits (ascii-range->char-set #x30 #x3A))) -- 2.25.1