From: Chris Hanson Date: Wed, 23 May 2018 07:17:55 +0000 (-0700) Subject: Rewrite the hashing primitives to support runtime-level hashing. X-Git-Tag: mit-scheme-pucked-x11-0.3.1~7^2~10 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=528fdcb53f576f000f4c7e6a71e01c0d7fb23fe1;p=mit-scheme.git Rewrite the hashing primitives to support runtime-level hashing. --- diff --git a/src/microcode/extern.h b/src/microcode/extern.h index bead4542e..ff52d511a 100644 --- a/src/microcode/extern.h +++ b/src/microcode/extern.h @@ -314,8 +314,22 @@ extern void weaken_symbol (SCHEME_OBJECT); extern unsigned long compute_extra_ephemeron_space (unsigned long); extern void guarantee_extra_ephemeron_space (unsigned long); -/* Random and OS utilities */ +/* Hashing */ + extern uint32_t memory_hash (unsigned long, const void *); +extern bool hashable_object_p (SCHEME_OBJECT); +extern uint32_t hash_object (SCHEME_OBJECT); +extern uint32_t combine_hashes (uint32_t, uint32_t); + +#if (FIXNUM_LENGTH >= 32) +# define HASH_TO_FIXNUM(hash) (ULONG_TO_FIXNUM (hash)) +#else + /* Shorten the result using xor-folding. */ +# define HASH_TO_FIXNUM(hash) \ + (ULONG_TO_FIXNUM (((hash) >> FIXNUM_LENGTH) ^ ((hash) & FIXNUM_MASK))); +#endif + +/* Random and OS utilities */ extern int strcmp_ci (const char *, const char *); extern bool interpreter_applicable_p (SCHEME_OBJECT); extern void add_reload_cleanup (void (*) (void)); diff --git a/src/microcode/intern.c b/src/microcode/intern.c index 9109441f8..ddbe34135 100644 --- a/src/microcode/intern.c +++ b/src/microcode/intern.c @@ -227,8 +227,8 @@ interning symbols.") { SCHEME_OBJECT string = (ARG_REF (1)); PRIMITIVE_RETURN - (ULONG_TO_FIXNUM (memory_hash ((STRING_LENGTH (string)), - (STRING_POINTER (string))))); + (HASH_TO_FIXNUM (memory_hash ((STRING_LENGTH (string)), + (STRING_POINTER (string))))); } } @@ -242,8 +242,8 @@ Equivalent to (MODULO (STRING-HASH STRING) DENOMINATOR).") { SCHEME_OBJECT string = (ARG_REF (1)); PRIMITIVE_RETURN - (ULONG_TO_FIXNUM ((memory_hash ((STRING_LENGTH (string)), - (STRING_POINTER (string)))) - % (arg_ulong_integer (2)))); + (HASH_TO_FIXNUM ((memory_hash ((STRING_LENGTH (string)), + (STRING_POINTER (string)))) + % (arg_ulong_integer (2)))); } } diff --git a/src/microcode/prim.c b/src/microcode/prim.c index 6d7d4f0b9..00c54a8a5 100644 --- a/src/microcode/prim.c +++ b/src/microcode/prim.c @@ -28,7 +28,6 @@ USA. #include "scheme.h" #include "prims.h" -#include "bignmint.h" static unsigned long arg_type (int arg) @@ -414,49 +413,29 @@ DEFINE_PRIMITIVE ("SET-CELL-CONTENTS!", Prim_set_cell_contents, 2, 2, 0) PRIMITIVE_RETURN (UNSPECIFIC); } -DEFINE_PRIMITIVE ("hash-simple-object", Prim_hash_simple_object, 1, 1, 0) +DEFINE_PRIMITIVE ("primitive-object-hash", Prim_primitive_object_hash, 1, 1, 0) { PRIMITIVE_HEADER (1); SCHEME_OBJECT object = (ARG_REF (1)); - unsigned long n_bytes; - const void * bytes; - if (GC_TYPE_NON_POINTER (object)) - { - n_bytes = (sizeof (SCHEME_OBJECT)); - bytes = (&object); - } - else if ((LEGACY_STRING_P (object)) || (BYTEVECTOR_P (object))) - { - n_bytes = (BYTEVECTOR_LENGTH (object)); - bytes = (BYTEVECTOR_POINTER (object)); - } - else if (UNICODE_STRING_P (object)) - { - n_bytes = (UNICODE_STRING_BYTE_LENGTH (object)); - bytes = (UNICODE_STRING_POINTER (object)); - } - else if (SYMBOL_P (object)) - { - SCHEME_OBJECT name = (MEMORY_REF (object, SYMBOL_NAME)); - n_bytes = (BYTEVECTOR_LENGTH (name)); - bytes = (BYTEVECTOR_POINTER (name)); - } - else if (BIGNUM_P (object)) - { - n_bytes = ((BIGNUM_LENGTH (object)) * (sizeof (bignum_digit_type))); - bytes = (BIGNUM_START_PTR (object)); - } - else if (FLONUM_P (object)) - { - n_bytes = ((FLOATING_VECTOR_LENGTH (object)) * (sizeof (double))); - bytes = (FLOATING_VECTOR_LOC (object, 0)); - } - else - PRIMITIVE_RETURN (SHARP_F); - PRIMITIVE_RETURN (ULONG_TO_FIXNUM (memory_hash (n_bytes, bytes))); -} - -DEFINE_PRIMITIVE ("primitive-memory-hash", Prim_memory_hash, 3, 3, 0) + PRIMITIVE_RETURN + ((hashable_object_p (object)) + ? (HASH_TO_FIXNUM (hash_object (object))) + : SHARP_F); +} + +DEFINE_PRIMITIVE ("primitive-object-hash-2", Prim_primitive_object_hash_2, 2, 2, 0) +{ + PRIMITIVE_HEADER (2); + SCHEME_OBJECT object1 = (ARG_REF (1)); + SCHEME_OBJECT object2 = (ARG_REF (2)); + PRIMITIVE_RETURN + (((hashable_object_p (object1)) && (hashable_object_p (object2))) + ? (HASH_TO_FIXNUM + (combine_hashes ((hash_object (object1)), (hash_object (object2))))) + : SHARP_F); +} + +DEFINE_PRIMITIVE ("primitive-memory-hash", Prim_primitive_memory_hash, 3, 3, 0) { PRIMITIVE_HEADER (3); SCHEME_OBJECT object = (ARG_REF (1)); @@ -477,7 +456,7 @@ DEFINE_PRIMITIVE ("primitive-memory-hash", Prim_memory_hash, 3, 3, 0) error_bad_range_arg (3); PRIMITIVE_RETURN - (ULONG_TO_FIXNUM + (HASH_TO_FIXNUM (memory_hash ((end - start), (((const uint8_t *) (OBJECT_ADDRESS (object))) + start)))); } diff --git a/src/microcode/utils.c b/src/microcode/utils.c index 7018d4374..c3e8415cd 100644 --- a/src/microcode/utils.c +++ b/src/microcode/utils.c @@ -30,6 +30,7 @@ USA. #include "prims.h" #include "history.h" #include "syscall.h" +#include "bignmint.h" SCHEME_OBJECT * history_register; unsigned long prev_restore_history_offset; @@ -488,8 +489,9 @@ arg_real_in_range (int arg_number, double lower_limit, double upper_limit) error_bad_range_arg (arg_number); return (result); } - -/* The FNV hash, short for Fowler/Noll/Vo in honor of its creators. */ + +/* The 32-bit FNV-1a hash, short for Fowler/Noll/Vo in honor of its + creators. */ uint32_t memory_hash (unsigned long length, const void * vp) @@ -498,13 +500,103 @@ memory_hash (unsigned long length, const void * vp) const uint8_t * end = (scan + length); uint32_t result = 2166136261U; while (scan < end) - result = ((result * 16777619U) ^ ((uint32_t) (*scan++))); -#if (FIXNUM_LENGTH >= 32) + { + result ^= ((uint32_t) (*scan++)); + result *= 16777619U; + } return (result); -#else - /* Shorten the result using xor-folding. */ - return ((result >> FIXNUM_LENGTH) ^ (result & FIXNUM_MASK)); -#endif +} + +bool +hashable_object_p (SCHEME_OBJECT object) +{ + if (GC_TYPE_NON_POINTER (object)) + return (true); + + switch (OBJECT_TYPE (object)) + { + case TC_BYTEVECTOR: + case TC_CHARACTER_STRING: + case TC_INTERNED_SYMBOL: + case TC_UNINTERNED_SYMBOL: + case TC_BIG_FIXNUM: + case TC_BIG_FLONUM: + case TC_RATNUM: + case TC_COMPLEX: + case TC_LIST: + case TC_WEAK_CONS: + case TC_VECTOR: + case TC_CELL: + return (true); + default: + return (false); + } +} + +uint32_t +hash_object (SCHEME_OBJECT object) +{ + if (GC_TYPE_NON_POINTER (object)) + return (memory_hash ((sizeof (SCHEME_OBJECT)), + (&object))); + + switch (OBJECT_TYPE (object)) + { + case TC_BYTEVECTOR: + case TC_CHARACTER_STRING: + return (memory_hash ((BYTEVECTOR_LENGTH (object)), + (BYTEVECTOR_POINTER (object)))); + + case TC_INTERNED_SYMBOL: + case TC_UNINTERNED_SYMBOL: + { + SCHEME_OBJECT name = (MEMORY_REF (object, SYMBOL_NAME)); + return (memory_hash ((BYTEVECTOR_LENGTH (name)), + (BYTEVECTOR_POINTER (name)))); + } + + case TC_BIG_FIXNUM: + return (memory_hash (((BIGNUM_LENGTH (object)) + * (sizeof (bignum_digit_type))), + (BIGNUM_START_PTR (object)))); + + case TC_BIG_FLONUM: + return (memory_hash (((FLOATING_VECTOR_LENGTH (object)) + * (sizeof (double))), + (FLOATING_VECTOR_LOC (object, 0)))); + + case TC_RATNUM: + case TC_COMPLEX: + return (combine_hashes ((hash_object (MEMORY_REF (object, 0))), + (hash_object (MEMORY_REF (object, 1))))); + + case TC_LIST: + case TC_WEAK_CONS: + return (combine_hashes ((hash_object (PAIR_CAR (object))), + (hash_object (PAIR_CDR (object))))); + + case TC_VECTOR: + { + const SCHEME_OBJECT * scan = (VECTOR_LOC (object, 0)); + const SCHEME_OBJECT * end = (scan + (VECTOR_LENGTH (object))); + uint32_t result = 0; + while (scan < end) + result = (combine_hashes (result, (hash_object (*scan++)))); + return result; + } + + case TC_CELL: + return (hash_object (MEMORY_REF (object, 0))); + + default: + return (0); + } +} + +uint32_t +combine_hashes (uint32_t hash1, uint32_t hash2) +{ + return ((hash1 * 31) + hash2); } bool