From 056bfc369b57df0864fd88f40ce26cd0e5da75ed Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Wed, 2 Dec 2009 20:53:54 -0500 Subject: [PATCH] Use weak pairs for some entries in obarray buckets. Entries for symbols with global bindings still use ordinary pairs. This enables interned symbols without global bindings to be garbage- collected. Operations in lookup.c that create or destroy global bindings update the relevant obarray bucket entry to strengthen or weaken it. In the process, fix intern.c's string_hash to accept a long string length, rather than a uint32_t string length, so that strings longer than 4 GB will not lose on 64-bit systems. --- src/edwin/schmod.scm | 60 +++++++++++++------------------ src/microcode/extern.h | 2 ++ src/microcode/intern.c | 72 +++++++++++++++++++++++++++++++------ src/microcode/lookup.c | 16 +++++++-- src/runtime/global.scm | 79 ++++++++++++++++++++++++++++++++--------- src/runtime/runtime.pkg | 1 + src/runtime/uenvir.scm | 35 +++++++----------- 7 files changed, 179 insertions(+), 86 deletions(-) diff --git a/src/edwin/schmod.scm b/src/edwin/schmod.scm index baada0a5e..0aa367d45 100644 --- a/src/edwin/schmod.scm +++ b/src/edwin/schmod.scm @@ -229,51 +229,41 @@ The following commands evaluate Scheme expressions: (lambda (prefix if-unique if-not-unique if-not-found) (let ((completions (let ((environment (evaluation-environment #f))) - (let ((completions - (obarray-completions - (if (and bound-only? - (environment-lookup - environment - '*PARSER-CANONICALIZE-SYMBOLS?*)) - (string-downcase prefix) - prefix)))) - (if bound-only? - (keep-matching-items completions - (lambda (name) - (environment-bound? environment name))) - completions))))) + (obarray-completions + (if (and bound-only? + (environment-lookup + environment + '*PARSER-CANONICALIZE-SYMBOLS?*)) + (string-downcase prefix) + prefix) + (if bound-only? + (lambda (symbol) + (environment-bound? environment symbol)) + (lambda (symbol) + symbol ;ignore + #t)))))) (cond ((not (pair? completions)) (if-not-found)) ((null? (cdr completions)) - (if-unique (system-pair-car (car completions)))) + (if-unique (symbol-name (car completions)))) (else - (let ((completions (map system-pair-car completions))) + (let ((completions (map symbol-name completions))) (if-not-unique (string-greatest-common-prefix completions) (lambda () (sort completions string<=?)))))))) (lambda (completion) (delete-string start end) (insert-string completion start)))))) - -(define (obarray-completions prefix) - (let ((obarray (fixed-objects-item 'OBARRAY))) - (let ((prefix-length (string-length prefix)) - (obarray-length (vector-length obarray))) - (let index-loop ((i 0)) - (if (fix:< i obarray-length) - (let bucket-loop ((symbols (vector-ref obarray i))) - (if (null? symbols) - (index-loop (fix:+ i 1)) - (let ((string (system-pair-car (car symbols)))) - (if (and (fix:<= prefix-length (string-length string)) - (let loop ((index 0)) - (or (fix:= index prefix-length) - (and (char=? (string-ref prefix index) - (string-ref string index)) - (loop (fix:+ index 1)))))) - (cons (car symbols) (bucket-loop (cdr symbols))) - (bucket-loop (cdr symbols)))))) - '()))))) + +(define (obarray-completions prefix filter) + (let ((completions '())) + (for-each-interned-symbol + (lambda (symbol) + (if (and (string-prefix? prefix (symbol-name symbol)) + (filter symbol)) + (set! completions (cons symbol completions))) + unspecific)) + completions)) (define-command scheme-complete-symbol "Perform completion on Scheme symbol preceding point. diff --git a/src/microcode/extern.h b/src/microcode/extern.h index e192cd18e..f636702ff 100644 --- a/src/microcode/extern.h +++ b/src/microcode/extern.h @@ -269,6 +269,8 @@ extern SCHEME_OBJECT string_to_symbol (SCHEME_OBJECT); extern SCHEME_OBJECT char_pointer_to_symbol (const char *); extern SCHEME_OBJECT memory_to_symbol (unsigned long, const void *); extern SCHEME_OBJECT find_symbol (unsigned long, const char *); +extern void strengthen_symbol (SCHEME_OBJECT); +extern void weaken_symbol (SCHEME_OBJECT); /* Random and OS utilities */ extern int strcmp_ci (const char *, const char *); diff --git a/src/microcode/intern.c b/src/microcode/intern.c index 9d44262d5..115ef32ff 100644 --- a/src/microcode/intern.c +++ b/src/microcode/intern.c @@ -32,7 +32,7 @@ USA. /* The FNV hash, short for Fowler/Noll/Vo in honor of its creators. */ static uint32_t -string_hash (uint32_t length, const char * string) +string_hash (long length, const char * string) { const unsigned char * scan = ((const unsigned char *) string); const unsigned char * end = (scan + length); @@ -58,19 +58,71 @@ find_symbol_internal (unsigned long length, const char * string) while (true) { SCHEME_OBJECT list = (*bucket); - if (PAIR_P (list)) + if ((WEAK_PAIR_P (list)) || (PAIR_P (list))) { SCHEME_OBJECT symbol = (PAIR_CAR (list)); - SCHEME_OBJECT name = (MEMORY_REF (symbol, SYMBOL_NAME)); - if (((STRING_LENGTH (name)) == length) - && ((memcmp ((STRING_POINTER (name)), string, length)) == 0)) - return (PAIR_CAR_LOC (list)); + if (INTERNED_SYMBOL_P (symbol)) + { + SCHEME_OBJECT name = (MEMORY_REF (symbol, SYMBOL_NAME)); + if (((STRING_LENGTH (name)) == length) + && ((memcmp ((STRING_POINTER (name)), string, length)) + == 0)) + return (PAIR_CAR_LOC (list)); + else + bucket = (PAIR_CDR_LOC (list)); + } + else + (*bucket) = (PAIR_CDR (list)); } else return (bucket); - bucket = (PAIR_CDR_LOC (list)); } } + +static void +replace_symbol_bucket_type (SCHEME_OBJECT symbol, unsigned int type) +{ + SCHEME_OBJECT obarray = (VECTOR_REF (fixed_objects, OBARRAY)); + SCHEME_OBJECT string = (MEMORY_REF (symbol, SYMBOL_NAME)); + long length = (STRING_LENGTH (string)); + const char *char_pointer = (STRING_POINTER (string)); + SCHEME_OBJECT *bucket + = (VECTOR_LOC (obarray, + ((string_hash (length, char_pointer)) + % (VECTOR_LENGTH (obarray))))); + while (true) + { + SCHEME_OBJECT list = (*bucket); + SCHEME_OBJECT element; + + assert ((WEAK_PAIR_P (list)) || (PAIR_P (list))); + element = (PAIR_CAR (list)); + + if (INTERNED_SYMBOL_P (element)) + { + if (element == symbol) + { + (*bucket) = (OBJECT_NEW_TYPE (type, list)); + return; + } + bucket = (PAIR_CDR_LOC (list)); + } + else + (*bucket) = (PAIR_CDR (list)); + } +} + +void +strengthen_symbol (SCHEME_OBJECT symbol) +{ + replace_symbol_bucket_type (symbol, TC_LIST); +} + +void +weaken_symbol (SCHEME_OBJECT symbol) +{ + replace_symbol_bucket_type (symbol, TC_WEAK_CONS); +} static SCHEME_OBJECT make_symbol (SCHEME_OBJECT string, SCHEME_OBJECT * cell) @@ -81,7 +133,7 @@ make_symbol (SCHEME_OBJECT string, SCHEME_OBJECT * cell) Free += 2; MEMORY_SET (symbol, SYMBOL_NAME, string); MEMORY_SET (symbol, SYMBOL_GLOBAL_VALUE, UNBOUND_OBJECT); - (*cell) = (cons (symbol, EMPTY_LIST)); + (*cell) = (system_pair_cons (TC_WEAK_CONS, symbol, EMPTY_LIST)); return (symbol); } } @@ -132,7 +184,7 @@ intern_symbol (SCHEME_OBJECT symbol) else { SCHEME_OBJECT result = (OBJECT_NEW_TYPE (TC_INTERNED_SYMBOL, symbol)); - (*cell) = (cons (result, EMPTY_LIST)); + (*cell) = (system_pair_cons (TC_WEAK_CONS, result, EMPTY_LIST)); return (result); } } @@ -147,7 +199,7 @@ arg_symbol (int n) const char * arg_interned_symbol (int n) { - CHECK_ARG (n, SYMBOL_P); + CHECK_ARG (n, INTERNED_SYMBOL_P); return (STRING_POINTER (MEMORY_REF ((ARG_REF (n)), SYMBOL_NAME))); } diff --git a/src/microcode/lookup.c b/src/microcode/lookup.c index 1d238b39f..3a7e54a7b 100644 --- a/src/microcode/lookup.c +++ b/src/microcode/lookup.c @@ -442,7 +442,11 @@ define_variable (SCHEME_OBJECT environment, SCHEME_OBJECT symbol, SCHEME_OBJECT * cell = (scan_frame (environment, symbol, 1)); SCHEME_OBJECT old_value; if (cell != 0) - return (assign_variable_end (cell, value, (&old_value), 1)); + { + if (GLOBAL_FRAME_P (environment)) + strengthen_symbol (symbol); + return (assign_variable_end (cell, value, (&old_value), 1)); + } } /* At this point, we know that environment can't be the global @@ -563,6 +567,9 @@ link_variables (SCHEME_OBJECT target_environment, SCHEME_OBJECT target_symbol, if (target_cell == source_cell) return (PRIM_DONE); + if ((target_cell != 0) && (GLOBAL_FRAME_P (target_environment))) + strengthen_symbol (target_symbol); + if ((target_cell != 0) && ((get_trap_kind (*target_cell)) == TRAP_COMPILER_CACHED)) { @@ -633,6 +640,8 @@ unbind_variable (SCHEME_OBJECT environment, SCHEME_OBJECT symbol, { SCHEME_OBJECT frame; SCHEME_OBJECT * cell = (find_binding_cell (environment, symbol, (&frame))); + if (GLOBAL_FRAME_P (frame)) + weaken_symbol (symbol); switch ((cell == 0) ? TRAP_UNBOUND : (get_trap_kind (*cell))) { case TRAP_UNBOUND: @@ -885,7 +894,8 @@ add_cache_reference (SCHEME_OBJECT environment, SCHEME_OBJECT symbol, SCHEME_OBJECT block, unsigned long offset, unsigned int reference_kind) { - SCHEME_OBJECT * cell = (find_binding_cell (environment, symbol, 0)); + SCHEME_OBJECT frame = 0; + SCHEME_OBJECT * cell = (find_binding_cell (environment, symbol, (&frame))); SCHEME_OBJECT dummy_cell = UNBOUND_OBJECT; if (cell == 0) /* There's no binding for the variable, and we don't have access @@ -893,6 +903,8 @@ add_cache_reference (SCHEME_OBJECT environment, SCHEME_OBJECT symbol, we'll install one, but it won't be attached to any environment structure. */ cell = (&dummy_cell); + else if (GLOBAL_FRAME_P (frame)) + strengthen_symbol (symbol); /* This procedure must complete to keep the data structures consistent, so we do a GC check in advance to guarantee that all of the allocations will finish. */ diff --git a/src/runtime/global.scm b/src/runtime/global.scm index 674db84c6..32f4506fb 100644 --- a/src/runtime/global.scm +++ b/src/runtime/global.scm @@ -88,6 +88,7 @@ USA. ((#x00010200 #x0001020000030400) #t) ((#x00020100 #x0004030000020100) #f) (else (error "Unable to determine endianness of host.")))) + (add-secondary-gc-daemon! clean-obarray) unspecific) ;;;; Potpourri @@ -316,23 +317,69 @@ USA. (define unspecific (object-new-type (ucode-type constant) 1)) -(define (obarray->list #!optional obarray) - (let ((obarray - (if (default-object? obarray) - (fixed-objects-item 'OBARRAY) - obarray))) - (let per-bucket - ((index (fix:- (vector-length obarray) 1)) - (accumulator '())) - (if (fix:< index 0) - accumulator - (let per-symbol - ((bucket (vector-ref obarray index)) - (accumulator accumulator)) - (if (pair? bucket) - (per-symbol (cdr bucket) (cons (car bucket) accumulator)) - (per-bucket (fix:- index 1) accumulator))))))) +(define (for-each-interned-symbol procedure) + (for-each-symbol-in-obarray (fixed-objects-item 'OBARRAY) procedure)) + +(define (for-each-symbol-in-obarray obarray procedure) + (let per-bucket ((index (vector-length obarray))) + (if (fix:> index 0) + (let ((index (fix:- index 1))) + (let per-symbol ((bucket (vector-ref obarray index))) + (cond ((weak-pair? bucket) + (let ((symbol (weak-car bucket))) + (if (weak-pair/car? bucket) + (procedure symbol))) + (per-symbol (weak-cdr bucket))) + ((pair? bucket) + (procedure (car bucket)) + (per-symbol (cdr bucket))) + (else + (per-bucket index)))))))) +(define (obarray->list #!optional obarray) + (let ((list '())) + (define (accumulate symbol) + (set! list (cons symbol list)) + unspecific) + (if (default-object? obarray) + (for-each-interned-symbol accumulate) + (for-each-symbol-in-obarray obarray accumulate)) + list)) + +(define (clean-obarray) + (without-interrupts + (lambda () + (let ((obarray (fixed-objects-item 'OBARRAY))) + (let loop ((index (vector-length obarray))) + (if (fix:> index 0) + (let ((index (fix:- index 1))) + (define (find-broken-entry bucket previous) + (cond ((weak-pair? bucket) + (let ((d (weak-cdr bucket))) + (if (weak-pair/car? bucket) + (find-broken-entry d bucket) + (delete-broken-entries d previous)))) + ((pair? bucket) + (find-broken-entry (cdr bucket) bucket)))) + (define (delete-broken-entries bucket previous) + (cond ((weak-pair? bucket) + (let ((d (weak-cdr bucket))) + (if (weak-pair/car? bucket) + (begin (clobber previous bucket) + (find-broken-entry d bucket)) + (delete-broken-entries d previous)))) + ((pair? bucket) + (clobber previous bucket) + (find-broken-entry (cdr bucket) bucket)) + (else + (clobber previous '())))) + (define (clobber previous tail) + (cond ((weak-pair? previous) (weak-set-cdr! previous tail)) + ((pair? previous) (set-cdr! previous tail)) + (else (vector-set! obarray index tail)))) + (find-broken-entry (vector-ref obarray index) #f) + (loop index)))))))) + (define (impurify object) object) diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index 085e4161c..8d35c7ddb 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -301,6 +301,7 @@ USA. exit false-procedure fasdump + for-each-interned-symbol get-fixed-objects-vector get-interrupt-enables guarantee-hook-list diff --git a/src/runtime/uenvir.scm b/src/runtime/uenvir.scm index 22a834d3b..9222d141f 100644 --- a/src/runtime/uenvir.scm +++ b/src/runtime/uenvir.scm @@ -240,29 +240,18 @@ USA. value) (define (walk-global keep? map-entry) - (let ((obarray (fixed-objects-item 'OBARRAY))) - (let ((n-buckets (vector-length obarray))) - (let per-bucket ((index 0) (result '())) - (if (fix:< index n-buckets) - (let per-symbol - ((bucket (vector-ref obarray index)) - (result result)) - (if (pair? bucket) - (per-symbol (cdr bucket) - (let ((name (car bucket))) - (if (special-unbound-name? name) - result - (let ((value - (map-reference-trap-value - (lambda () - (system-pair-cdr name))))) - (if (or (unbound-reference-trap? value) - (not (keep? value))) - result - (cons (map-entry name value) - result)))))) - (per-bucket (fix:+ index 1) result))) - result))))) + (let ((result '())) + (for-each-interned-symbol + (lambda (name) + (if (not (special-unbound-name? name)) + (let ((value + (map-reference-trap-value + (lambda () + (system-pair-cdr name))))) + (if (and (not (unbound-reference-trap? value)) + (keep? value)) + (set! result (cons (map-entry value) result))))))) + result)) (define (special-unbound-name? name) (eq? name package-name-tag)) -- 2.25.1