From 812ff1028a92b8b69f02609b09c3d5364f757c46 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Fri, 16 Jun 1989 09:44:40 +0000 Subject: [PATCH] Significant rewrite -- changed algorithm used for string hashing, altered interfaces of utility procedures. Eliminated character-list primitives, which are no longer used. Added new primitive, `find-symbol', which is like `symbol->string' except that it returns #F if the symbol was not already in the obarray. --- v7/src/microcode/intern.c | 332 ++++++++++++++------------------------ 1 file changed, 122 insertions(+), 210 deletions(-) diff --git a/v7/src/microcode/intern.c b/v7/src/microcode/intern.c index 72f10f8f7..26f811cb5 100644 --- a/v7/src/microcode/intern.c +++ b/v7/src/microcode/intern.c @@ -1,6 +1,6 @@ /* -*-C-*- -Copyright (c) 1987, 1988 Massachusetts Institute of Technology +Copyright (c) 1987, 1988, 1989 Massachusetts Institute of Technology This material was developed by the Scheme project at the Massachusetts Institute of Technology, Department of Electrical Engineering and @@ -30,275 +30,187 @@ Technology nor of any adaptation thereof in any advertising, promotional, or sales literature without prior written consent from MIT in each case. */ -/* $Header: /Users/cph/tmp/foo/mit-scheme/mit-scheme/v7/src/microcode/intern.c,v 9.45 1988/08/15 20:49:57 cph Exp $ */ +/* $Header: /Users/cph/tmp/foo/mit-scheme/mit-scheme/v7/src/microcode/intern.c,v 9.46 1989/06/16 09:44:40 cph Exp $ */ -/* Utilities for manipulating symbols. */ +/* String hash functions and interning of symbols. */ #include "scheme.h" #include "prims.h" #include "trap.h" #include "string.h" -/* Hashing strings and character lists. */ +/* Hashing strings */ -long -Do_Hash (String_Ptr, String_Length) - char *String_Ptr; - long String_Length; -{ - fast long i, Value, End_Count; - - Value = (LENGTH_MULTIPLIER * String_Length); - End_Count = ((String_Length > MAX_HASH_CHARS) ? - MAX_HASH_CHARS : - String_Length); - for (i = 0; i < End_Count; i++) - Value = ((Value << SHIFT_AMOUNT) + (MAX_CHAR & String_Ptr[i])); - return (Value); -} - -long -scheme_string_hash (string) +static long +string_hash (string) Pointer string; { - return - (Do_Hash ((Scheme_String_To_C_String (string)), - (string_length (string)))); + fast long result; + fast unsigned char * scan; + fast unsigned char * end; + + result = (string_length (string)); + scan = ((unsigned char *) (string_pointer (string, 0))); + end = (scan + result); + while (1) + { + if (scan >= end) break; + result ^= (*scan++); + if (scan >= end) break; + result ^= ((*scan++) << CHAR_SIZE); + } + return (result); } -Pointer -Hash (string) - Pointer string; +static Boolean +string_equal (string1, string2) + Pointer string1, string2; { - return (MAKE_SIGNED_FIXNUM (scheme_string_hash (string))); + fast char * scan1; + fast char * scan2; + fast long length; + fast char * end1; + + scan1 = (string_pointer (string1, 0)); + scan2 = (string_pointer (string2, 0)); + if (scan1 == scan2) + return (true); + length = (string_length (string1)); + if (length != (string_length (string2))) + return (false); + end1 = (scan1 + length); + + while (scan1 < end1) + if ((*scan1++) != (*scan2++)) + return (false); + return (true); } - -Boolean -string_equal (String1, String2) - Pointer String1, String2; + +static Pointer * +find_symbol_internal (string) + Pointer string; { - fast char *S1, *S2; - fast long i, Length1, Length2; - - if (Address(String1) == Address(String2)) - return true; - Length1 = ((long) (Fast_Vector_Ref(String1, STRING_LENGTH))); - Length2 = ((long) (Fast_Vector_Ref(String2, STRING_LENGTH))); - if (Length1 != Length2) + fast Pointer * bucket; { - return false; + fast Pointer obarray = (Get_Fixed_Obj_Slot (OBArray)); + bucket = + (Nth_Vector_Loc (obarray, + (((string_hash (string)) % (Vector_Length (obarray))) + + 1))); } - - S1 = ((char *) Nth_Vector_Loc(String1, STRING_CHARS)); - S2 = ((char *) Nth_Vector_Loc(String2, STRING_CHARS)); - for (i = 0; i < Length1; i++) - { - if (*S1++ != *S2++) + while ((*bucket) != EMPTY_LIST) { - return (false); + fast Pointer symbol = (Vector_Ref ((*bucket), CONS_CAR)); + if (string_equal (string, (Fast_Vector_Ref (symbol, SYMBOL_NAME)))) + return (Nth_Vector_Loc ((*bucket), CONS_CAR)); + bucket = (Nth_Vector_Loc ((*bucket), CONS_CDR)); } - } - return (true); + return (bucket); } - -/* Interning involves hashing the input string and either returning - an existing symbol with that name from the ObArray or creating a - new symbol and installing it in the ObArray. The resulting interned - symbol is stored in *Un_Interned. */ /* Set this to be informed of symbols as they are interned. */ -void (*intern_symbol_hook) (); +void (*intern_symbol_hook) () = ((void (*) ()) 0); -extern void Intern(); - -void -Intern (Un_Interned) - Pointer *Un_Interned; +static Pointer +link_new_symbol (symbol, cell) + Pointer symbol; + Pointer * cell; { - fast Pointer string, *bucket, symbol; - Pointer Ob_Array; - - string = (Fast_Vector_Ref (*Un_Interned, SYMBOL_NAME)); - Ob_Array = (Get_Fixed_Obj_Slot (OBArray)); - bucket = - (Nth_Vector_Loc (Ob_Array, - (((scheme_string_hash (string)) % - (Vector_Length (Ob_Array))) - + 1))); - - while (*bucket != NIL) - { - symbol = (Vector_Ref (*bucket, CONS_CAR)); - if (string_equal(string, (Fast_Vector_Ref (symbol, SYMBOL_NAME)))) - { - *Un_Interned = symbol; - return; - } - bucket = (Nth_Vector_Loc (*bucket, CONS_CDR)); - } - - /* Symbol does not exist yet in obarray. bucket points to the + /* `symbol' does not exist yet in obarray. `cell' points to the cell containing the final '() in the list. Replace this - with the CONS of the new symbol and '() (i.e. extend the + with a cons of the new symbol and '() (i.e. extend the list in the bucket by 1 new element). */ - Store_Type_Code (*Un_Interned, TC_INTERNED_SYMBOL); - *bucket = (Make_Pointer (TC_LIST, Free)); - Free[CONS_CAR] = *Un_Interned; - Free[CONS_CDR] = NIL; + fast Pointer result = + (Make_Object (TC_INTERNED_SYMBOL, (OBJECT_DATUM (symbol)))); + Primitive_GC_If_Needed (2); + (*cell) = (Make_Pointer (TC_LIST, Free)); + (Free [CONS_CAR]) = result; + (Free [CONS_CDR]) = EMPTY_LIST; Free += 2; - if (intern_symbol_hook) - (*intern_symbol_hook) (*Un_Interned); - return; + if (intern_symbol_hook != ((void (*) ()) 0)) + (*intern_symbol_hook) (result); + return (result); } -Pointer -string_to_symbol (String) - Pointer String; +Pointer +find_symbol (string) + Pointer string; { - Pointer New_Symbol, Interned_Symbol, *Orig_Free; - - Orig_Free = Free; - New_Symbol = Make_Pointer(TC_UNINTERNED_SYMBOL, Free); - Free[SYMBOL_NAME] = String; - Free[SYMBOL_GLOBAL_VALUE] = UNBOUND_OBJECT; - Free += 2; - Interned_Symbol = New_Symbol; - - /* The work is done by Intern which returns in Interned_Symbol - either the same symbol we gave it (in which case we need to check - for GC) or an existing symbol (in which case we have to release - the heap space acquired to hold New_Symbol). - */ + fast Pointer result = (* (find_symbol_internal (string))); + return ((result == EMPTY_LIST) ? SHARP_F : result); +} - Intern(&Interned_Symbol); - if (Address(Interned_Symbol) == Address(New_Symbol)) +Pointer +string_to_symbol (string) + Pointer string; +{ + fast Pointer * cell = (find_symbol_internal (string)); + if ((*cell) != EMPTY_LIST) + return (*cell); + Primitive_GC_If_Needed (2); { - Primitive_GC_If_Needed(0); + fast Pointer symbol = (Make_Pointer (TC_UNINTERNED_SYMBOL, Free)); + (Free [SYMBOL_NAME]) = string; + (Free [SYMBOL_GLOBAL_VALUE]) = UNBOUND_OBJECT; + Free += 2; + return (link_new_symbol (symbol, cell)); } - else - Free = Orig_Free; - return Interned_Symbol; } - -/* For debugging, given a String, return either a "not interned" - * message or the address of the symbol and its global value. - */ -void -Find_Symbol (scheme_string) - Pointer scheme_string; +Pointer +intern_symbol (symbol) + Pointer symbol; { - Pointer the_obarray, symbol, *bucket; - long hash_of_string; - - hash_of_string = scheme_string_hash(scheme_string); - the_obarray = Get_Fixed_Obj_Slot(OBArray); - hash_of_string %= Vector_Length(the_obarray); - bucket = Nth_Vector_Loc(the_obarray, hash_of_string); - while (*bucket != NIL) - { - if (string_equal(scheme_string, - Vector_Ref(Vector_Ref(*bucket, CONS_CAR), - SYMBOL_NAME))) - { - symbol = Vector_Ref(*bucket, CONS_CAR); - printf("\nInterned Symbol: 0x%x", symbol); - Print_Expression(Vector_Ref(symbol, SYMBOL_GLOBAL_VALUE), - "Value"); - printf("\n"); - return; - } - bucket = Nth_Vector_Loc(*bucket, CONS_CDR); - } - printf("\nNot interned.\n"); - return; + fast Pointer * cell = + (find_symbol_internal (Fast_Vector_Ref (symbol, SYMBOL_NAME))); + return + (((*cell) != EMPTY_LIST) + ? (*cell) + : (link_new_symbol (symbol, cell))); } -/* (STRING->SYMBOL STRING) - Similar to INTERN-CHARACTER-LIST, except this one takes a string - instead of a list of ascii values as argument. */ - -DEFINE_PRIMITIVE ("STRING->SYMBOL", Prim_string_to_symbol, 1, 1, 0) +DEFINE_PRIMITIVE ("FIND-SYMBOL", Prim_find_symbol, 1, 1, + "(FIND-SYMBOL STRING) +Returns the symbol whose name is STRING, or #F if no such symbol exists.") { PRIMITIVE_HEADER (1); CHECK_ARG (1, STRING_P); - PRIMITIVE_RETURN (string_to_symbol (ARG_REF (1))); + PRIMITIVE_RETURN (find_symbol (ARG_REF (1))); } -/* (INTERN-CHARACTER-LIST LIST) - LIST should consist of the ASCII codes for characters. Returns - a new (interned) symbol made out of these characters. Notice - that this is a fairly low-level primitive, and no checking is - done on the characters except that they are in the range 0 to - 255. Thus non-printing, lower-case, and special characters can - be put into symbols this way. */ - -DEFINE_PRIMITIVE ("INTERN-CHARACTER-LIST", Prim_intern_character_list, 1, 1, 0) +DEFINE_PRIMITIVE ("STRING->SYMBOL", Prim_string_to_symbol, 1, 1, + "(STRING->SYMBOL STRING) +Returns the symbol whose name is STRING, constructing a new symbol if needed.") { - extern Pointer list_to_string(); PRIMITIVE_HEADER (1); - PRIMITIVE_RETURN (string_to_symbol (list_to_string (ARG_REF (1)))); + CHECK_ARG (1, STRING_P); + PRIMITIVE_RETURN (string_to_symbol (ARG_REF (1))); } -/* (STRING-HASH STRING) - Return a hash value for a string. This uses the hashing - algorithm used for interning symbols. It is intended for use by - the reader in creating interned symbols. */ - -DEFINE_PRIMITIVE ("STRING-HASH", Prim_string_hash, 1, 1, 0) +DEFINE_PRIMITIVE ("STRING-HASH", Prim_string_hash, 1, 1, + "(STRING-HASH STRING) +Return a hash value for a string. This uses the hashing +algorithm used for interning symbols. It is intended for use by +the reader in creating interned symbols.") { PRIMITIVE_HEADER (1); CHECK_ARG (1, STRING_P); - PRIMITIVE_RETURN (Hash (ARG_REF (1))); + PRIMITIVE_RETURN (MAKE_UNSIGNED_FIXNUM (string_hash (ARG_REF (1)))); } -DEFINE_PRIMITIVE ("STRING-HASH-MOD", Prim_string_hash_mod, 2, 2, 0) +DEFINE_PRIMITIVE ("STRING-HASH-MOD", Prim_string_hash_mod, 2, 2, + "(STRING-HASH-MOD STRING DENOMINATOR) +DENOMINATOR must be a nonnegative integer. +Equivalent to (MOD (STRING-HASH STRING) DENOMINATOR).") { PRIMITIVE_HEADER (2); CHECK_ARG (1, STRING_P); PRIMITIVE_RETURN (MAKE_UNSIGNED_FIXNUM - ((scheme_string_hash (ARG_REF (1))) % - (arg_nonnegative_integer (2)))); -} - -/* (CHARACTER-LIST-HASH LIST) - Takes a list of ASCII codes for characters and returns a hash - code for them. This uses the hashing function used to intern - symbols in Fasload, and is really intended only for that - purpose. */ - -DEFINE_PRIMITIVE ("CHARACTER-LIST-HASH", Prim_character_list_hash, 1, 1, 0) -{ - fast Pointer char_list; - long Length; - Pointer This_Char; - char String[MAX_HASH_CHARS]; - PRIMITIVE_HEADER (1); - - char_list = (ARG_REF (1)); - Touch_In_Primitive (char_list, char_list); - for (Length = 0; (PAIR_P (char_list)); Length++) - { - if (Length < MAX_HASH_CHARS) - { - Touch_In_Primitive - ((Vector_Ref (char_list, CONS_CAR)), This_Char); - if (! (CHARACTER_P (This_Char))) - error_wrong_type_arg (1); - Range_Check((String [Length]), This_Char, - '\0', ((char) MAX_CHAR), - ERR_ARG_1_WRONG_TYPE); - Touch_In_Primitive - ((Vector_Ref (char_list, CONS_CDR)), char_list); - } - } - if (char_list != NIL) - error_wrong_type_arg (1); - PRIMITIVE_RETURN (MAKE_SIGNED_FIXNUM (Do_Hash (String, Length))); + ((string_hash (ARG_REF (1))) % (arg_nonnegative_integer (2)))); } -- 2.25.1