Add i386 assembly to allocate and access thread-local variables.
authorMatt Birkholz <puck@birchwood-abbey.net>
Tue, 21 Jul 2015 07:27:11 +0000 (00:27 -0700)
committerMatt Birkholz <puck@birchwood-abbey.net>
Thu, 26 Nov 2015 08:09:46 +0000 (01:09 -0700)
src/microcode/cmpauxmd/i386.m4
src/microcode/cmpintmd/i386.c
src/microcode/cmpintmd/i386.h

index 2eaf3672ac3bfd76de626bdd1d91dbf0061debcd..06cf9b9b934490458e05dcbee54a40dfc5dc4b13 100644 (file)
@@ -41,7 +41,7 @@
 ###    2) The C compiler divides registers into three groups:
 ###    - Linkage registers, used for procedure calls and global
 ###    references.  On i386 (gcc and Zortech C): %ebp, %esp.
-###    - super temporaries, not preserved accross procedure calls and
+###    - super temporaries, not preserved across procedure calls and
 ###    always usable. On i386 (gcc and Zortech C): %eax, %edx, %ecx.
 ###    - preserved registers saved by the callee if they are written.
 ###    On i386 (gcc and Zortech C): all others (%ebx, %esi, %edi).
@@ -176,6 +176,9 @@ IFNDASM(`define(popfd,`popf')')
 ifdef(`SUPPRESS_LEADING_UNDERSCORE',
        `define(EVR,`$1')',
        `define(EVR,`_$1')')
+ifdef(`ENABLE_SMP',
+      `define(TLVR,`%gs:$1\@ntpoff')',
+      `define(TLVR,`EVR($1)')')
 
 # When using the Watcom C compiler with register-based calling
 # conventions, source-code function names normally expand to `FOO_',
@@ -350,6 +353,38 @@ define(rmask,REG(ebp))
 IFDASM(`.586p
 .model flat')
 
+use_external_data(EVR(utility_table))
+
+ifdef(`ENABLE_SMP',`
+       .section .tbss,"awT",\@nobits
+       .align 4
+define(`define_tllong',`
+       .globl $1
+       .type $1 STT_TLS
+       .size $1, 4
+$1:
+       .zero   4
+')
+       define_tllong(Free)
+       define_tllong(heap_alloc_limit)
+       define_tllong(heap_end)
+       define_tllong(stack_guard)
+       define_tllong(stack_pointer)
+       define_tllong(stack_start)
+       define_tllong(C_Stack_Pointer)
+       define_tllong(C_Frame_Pointer)
+
+       .zero 128
+       .globl Registers
+       .type Registers STT_TLS
+       .size Registers, eval(REGBLOCK_SIZE_IN_OBJECTS*4)
+Registers:
+       .zero eval(REGBLOCK_SIZE_IN_OBJECTS*4)
+',`
+ifdef(`WIN32',`
+use_external_data(EVR(RegistersPtr))
+',`
+
 DECLARE_DATA_SEGMENT()
 declare_alignment(2)
 
@@ -359,29 +394,25 @@ use_external_data(EVR(heap_end))
 use_external_data(EVR(stack_guard))
 use_external_data(EVR(stack_pointer))
 use_external_data(EVR(stack_start))
-use_external_data(EVR(utility_table))
 
-ifdef(`WIN32',`
-use_external_data(EVR(RegistersPtr))
-',`
 define_data(Regstart)
 allocate_space(Regstart,128)
 
 define_data(Registers)
 allocate_space(Registers,eval(REGBLOCK_SIZE_IN_OBJECTS*4))
-')
-
-define_data(i387_presence)
-allocate_longword(i387_presence)
-
-define_data(sse_presence)
-allocate_longword(sse_presence)
 
 define_data(C_Stack_Pointer)
 allocate_longword(C_Stack_Pointer)
 
 define_data(C_Frame_Pointer)
 allocate_longword(C_Frame_Pointer)
+')')
+
+define_data(i387_presence)
+allocate_longword(i387_presence)
+
+define_data(sse_presence)
+allocate_longword(sse_presence)
 
 define_data(ia32_cpuid_supported)
 allocate_longword(ia32_cpuid_supported)
@@ -559,7 +590,7 @@ no_cpuid_instr:
 # set esp to something funny.
 
 define_c_label(within_c_stack)
-       OP(mov,l)       TW(EVR(C_Stack_Pointer),REG(eax))
+       OP(mov,l)       TW(TLVR(C_Stack_Pointer),REG(eax))
        # Are we currently in C, signalled by having no saved C stack pointer?
        OP(cmp,l)       TW(IMM(0),REG(eax))
        # Yes: just call the function without messing with esp.
@@ -568,7 +599,7 @@ define_c_label(within_c_stack)
        OP(push,l)      REG(ebp)                        # Save frame pointer
        OP(mov,l)       TW(REG(esp),REG(ebp))
        OP(mov,l)       TW(REG(eax),REG(esp))           # Switch to C stack
-       OP(mov,l)       TW(IMM(0),EVR(C_Stack_Pointer))
+       OP(mov,l)       TW(IMM(0),TLVR(C_Stack_Pointer))
        OP(push,l)      IMM(0)                          # Align sp to 16 bytes
        OP(push,l)      REG(ebp)                        # Save stack pointer
        OP(push,l)      LOF(HEX(c),REG(ebp))            # Push argument
@@ -578,7 +609,7 @@ define_debugging_label(within_c_stack_restore)
        OP(pop,l)       REG(eax)                        # Pop argument
        OP(mov,l)       TW(REG(esp),REG(eax))           # Restore C stack ptr
        OP(add,l)       TW(IMM(8),REG(eax))
-       OP(mov,l)       TW(REG(eax),EVR(C_Stack_Pointer))
+       OP(mov,l)       TW(REG(eax),TLVR(C_Stack_Pointer))
        OP(pop,l)       REG(esp)                        # Restore stack pointer
                                                        #   and switch back to
                                                        #   Scheme stack
@@ -603,15 +634,18 @@ define_c_label(C_to_interface)
        OP(push,l)      REG(ebx)
        OP(mov,l)       TW(LOF(8,REG(ebp)),REG(edx))    # Entry point
                                                        # Preserve frame ptr
-       OP(mov,l)       TW(REG(ebp),EVR(C_Frame_Pointer))
+       OP(mov,l)       TW(REG(ebp),TLVR(C_Frame_Pointer))
                                                        # Preserve stack ptr
-       OP(mov,l)       TW(REG(esp),EVR(C_Stack_Pointer))
+       OP(mov,l)       TW(REG(esp),TLVR(C_Stack_Pointer))
                                                        # Register block = %esi
                                                        # Scheme offset in NT
 
 ifdef(`WIN32',
 `      OP(mov,l)       TW(ABS(EVR(RegistersPtr)),regs)',
-`      OP(lea,l)       TW(ABS(EVR(Registers)),regs)')
+`ifdef(`ENABLE_SMP',
+`      OP(mov,l)       TW(%gs:0,regs)
+       OP(add,l)       TW($Registers\@ntpoff,regs)',
+`      OP(lea,l)       TW(ABS(EVR(Registers)),regs)')')
        jmp     EPFR(interface_to_scheme)
 
 define_hook_label(trampoline_to_interface)
@@ -630,8 +664,8 @@ define_debugging_label(scheme_to_interface)
 
 # These two moves must happen _before_ the ffree instructions below.
 # Otherwise recovery from SIGFPE there will fail.
-       OP(mov,l)       TW(REG(esp),EVR(stack_pointer))
-       OP(mov,l)       TW(rfree,EVR(Free))
+       OP(mov,l)       TW(REG(esp),TLVR(stack_pointer))
+       OP(mov,l)       TW(rfree,TLVR(Free))
 
 IF387(`
        OP(cmp,l)       TW(IMM(0),ABS(EVR(i387_presence)))
@@ -647,11 +681,11 @@ IF387(`
 scheme_to_interface_proceed:
 ')
 
-       OP(mov,l)       TW(EVR(C_Stack_Pointer),REG(esp))
-       OP(mov,l)       TW(EVR(C_Frame_Pointer),REG(ebp))
+       OP(mov,l)       TW(TLVR(C_Stack_Pointer),REG(esp))
+       OP(mov,l)       TW(TLVR(C_Frame_Pointer),REG(ebp))
 
        # Signal to within_c_stack that we are now in C land.
-       OP(mov,l)       TW(IMM(0),EVR(C_Stack_Pointer))
+       OP(mov,l)       TW(IMM(0),TLVR(C_Stack_Pointer))
 
        OP(sub,l)       TW(IMM(8),REG(esp))     # alloc struct return
 
@@ -689,14 +723,14 @@ IF387(`
        ffree   ST(7)
 interface_to_scheme_proceed:
 ')
-       OP(mov,l)       TW(EVR(Free),rfree)             # Free pointer = %edi
+       OP(mov,l)       TW(TLVR(Free),rfree)            # Free pointer = %edi
        OP(mov,l)       TW(LOF(REGBLOCK_VAL(),regs),REG(eax)) # Value/dynamic link
        OP(mov,l)       TW(IMM(ADDRESS_MASK),rmask)     # = %ebp
 
        # Restore the C stack pointer, which we zeroed back in
        # scheme_to_interface, for within_c_stack.
-       OP(mov,l)       TW(REG(esp),EVR(C_Stack_Pointer))
-       OP(mov,l)       TW(EVR(stack_pointer),REG(esp))
+       OP(mov,l)       TW(REG(esp),TLVR(C_Stack_Pointer))
+       OP(mov,l)       TW(TLVR(stack_pointer),REG(esp))
        OP(mov,l)       TW(REG(eax),REG(ecx))           # Preserve if used
        OP(and,l)       TW(rmask,REG(ecx))              # Restore potential dynamic link
        OP(mov,l)       TW(REG(ecx),LOF(REGBLOCK_DLINK(),regs))
@@ -889,13 +923,13 @@ set_interrupt_enables_memtop_1:
        # If GC is enabled, set memtop to the heap allocation limit.
        OP(test,l)      TW(IMM(INT_GC),REG(ecx))
        jz      set_interrupt_enables_memtop_2
-       OP(mov,l)       TW(ABS(EVR(heap_alloc_limit)),REG(edx))
+       OP(mov,l)       TW(ABS(TLVR(heap_alloc_limit)),REG(edx))
        jmp     set_interrupt_enables_set_memtop
 
 set_interrupt_enables_memtop_2:
        # Otherwise, there is no interrupt pending, and GC is not
        # enabled, so set memtop to the absolute heap end.
-       OP(mov,l)       TW(ABS(EVR(heap_end)),REG(edx))
+       OP(mov,l)       TW(ABS(TLVR(heap_end)),REG(edx))
 
 set_interrupt_enables_set_memtop:
        OP(mov,l)       TW(REG(edx),LOF(REGBLOCK_MEMTOP(),regs))
@@ -903,11 +937,11 @@ set_interrupt_enables_set_memtop:
 set_interrupt_enables_determine_stack_guard:
        OP(test,l)      TW(IMM(INT_Stack_Overflow),REG(ecx))
        jz      set_interrupt_enables_stack_guard_1
-       OP(mov,l)       TW(ABS(EVR(stack_guard)),REG(edx))
+       OP(mov,l)       TW(ABS(TLVR(stack_guard)),REG(edx))
        jmp     set_interrupt_enables_set_stack_guard
 
 set_interrupt_enables_stack_guard_1:
-       OP(mov,l)       TW(ABS(EVR(stack_start)),REG(edx))
+       OP(mov,l)       TW(ABS(TLVR(stack_start)),REG(edx))
 
 set_interrupt_enables_set_stack_guard:
        OP(mov,l)       TW(REG(edx),LOF(REGBLOCK_STACK_GUARD(),regs))
index cc628f2019527f7d29ddbee6f0c1101812325c7d..a3f8e7c8af3774630193392e531b74342f2abbd8 100644 (file)
@@ -238,12 +238,136 @@ store_trampoline_insns (insn_t * entry, byte_t code)
 #  define VM_PROT_SCHEME (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
 #endif
 
+static bool fp_support_present = false;
+
+void
+i386_init_hook (void)
+{
+  fp_support_present = (i386_interface_initialize ());
+
+  declare_builtin ((unsigned long)asm_scheme_to_interface, "asm_scheme_to_interface");
+  declare_builtin ((unsigned long)asm_scheme_to_interface_call, "asm_scheme_to_interface_call");
+  declare_builtin ((unsigned long)asm_trampoline_to_interface, "asm_trampoline_to_interface");
+  declare_builtin ((unsigned long)asm_interrupt_procedure, "asm_interrupt_procedure");
+  declare_builtin ((unsigned long)asm_interrupt_continuation, "asm_interrupt_continuation");
+  declare_builtin ((unsigned long)asm_interrupt_closure, "asm_interrupt_closure");
+  declare_builtin ((unsigned long)asm_interrupt_dlink, "asm_interrupt_dlink");
+  declare_builtin ((unsigned long)asm_primitive_apply, "asm_primitive_apply");
+  declare_builtin ((unsigned long)asm_primitive_lexpr_apply, "asm_primitive_lexpr_apply");
+  declare_builtin ((unsigned long)asm_assignment_trap, "asm_assignment_trap");
+  declare_builtin ((unsigned long)asm_reference_trap, "asm_reference_trap");
+  declare_builtin ((unsigned long)asm_safe_reference_trap, "asm_safe_reference_trap");
+  declare_builtin ((unsigned long)asm_link, "asm_link");
+  declare_builtin ((unsigned long)asm_error, "asm_error");
+  declare_builtin ((unsigned long)asm_primitive_error, "asm_primitive_error");
+  declare_builtin ((unsigned long)asm_short_primitive_apply, "asm_short_primitive_apply");
+  if (fp_support_present)
+    {
+      declare_builtin ((unsigned long)asm_generic_add, "asm_generic_add");
+      declare_builtin ((unsigned long)asm_generic_subtract, "asm_generic_subtract");
+      declare_builtin ((unsigned long)asm_generic_multiply, "asm_generic_multiply");
+      declare_builtin ((unsigned long)asm_generic_divide, "asm_generic_divide");
+      declare_builtin ((unsigned long)asm_generic_equal, "asm_generic_equal");
+      declare_builtin ((unsigned long)asm_generic_less, "asm_generic_less");
+      declare_builtin ((unsigned long)asm_generic_greater, "asm_generic_greater");
+      declare_builtin ((unsigned long)asm_generic_increment, "asm_generic_increment");
+      declare_builtin ((unsigned long)asm_generic_decrement, "asm_generic_decrement");
+      declare_builtin ((unsigned long)asm_generic_zero, "asm_generic_zero");
+      declare_builtin ((unsigned long)asm_generic_positive, "asm_generic_positive");
+      declare_builtin ((unsigned long)asm_generic_negative, "asm_generic_negative");
+      declare_builtin ((unsigned long)asm_generic_quotient, "asm_generic_quotient");
+      declare_builtin ((unsigned long)asm_generic_remainder, "asm_generic_remainder");
+      declare_builtin ((unsigned long)asm_generic_modulo, "asm_generic_modulo");
+    }
+  else
+    {
+      declare_builtin ((unsigned long)asm_nofp_add, "asm_nofp_add");
+      declare_builtin ((unsigned long)asm_nofp_subtract, "asm_nofp_subtract");
+      declare_builtin ((unsigned long)asm_nofp_multiply, "asm_nofp_multiply");
+      declare_builtin ((unsigned long)asm_nofp_divide, "asm_nofp_divide");
+      declare_builtin ((unsigned long)asm_nofp_equal, "asm_nofp_equal");
+      declare_builtin ((unsigned long)asm_nofp_less, "asm_nofp_less");
+      declare_builtin ((unsigned long)asm_nofp_greater, "asm_nofp_greater");
+      declare_builtin ((unsigned long)asm_nofp_increment, "asm_nofp_increment");
+      declare_builtin ((unsigned long)asm_nofp_decrement, "asm_nofp_decrement");
+      declare_builtin ((unsigned long)asm_nofp_zero, "asm_nofp_zero");
+      declare_builtin ((unsigned long)asm_nofp_positive, "asm_nofp_positive");
+      declare_builtin ((unsigned long)asm_nofp_negative, "asm_nofp_negative");
+      declare_builtin ((unsigned long)asm_nofp_quotient, "asm_nofp_quotient");
+      declare_builtin ((unsigned long)asm_nofp_remainder, "asm_nofp_remainder");
+      declare_builtin ((unsigned long)asm_nofp_modulo, "asm_nofp_modulo");
+    }
+  declare_builtin ((unsigned long)asm_sc_apply, "asm_sc_apply");
+  declare_builtin ((unsigned long)asm_sc_apply_size_1, "asm_sc_apply_size_1");
+  declare_builtin ((unsigned long)asm_sc_apply_size_2, "asm_sc_apply_size_2");
+  declare_builtin ((unsigned long)asm_sc_apply_size_3, "asm_sc_apply_size_3");
+  declare_builtin ((unsigned long)asm_sc_apply_size_4, "asm_sc_apply_size_4");
+  declare_builtin ((unsigned long)asm_sc_apply_size_5, "asm_sc_apply_size_5");
+  declare_builtin ((unsigned long)asm_sc_apply_size_6, "asm_sc_apply_size_6");
+  declare_builtin ((unsigned long)asm_sc_apply_size_7, "asm_sc_apply_size_7");
+  declare_builtin ((unsigned long)asm_sc_apply_size_8, "asm_sc_apply_size_8");
+  declare_builtin ((unsigned long)asm_interrupt_continuation_2, "asm_interrupt_continuation_2");
+  if (ia32_cpuid_needed)
+    declare_builtin ((unsigned long)asm_serialize_cache, "asm_serialize_cache");
+  else
+    declare_builtin ((unsigned long)asm_dont_serialize_cache, "asm_dont_serialize_cache");
+  declare_builtin ((unsigned long)asm_fixnum_shift, "asm_fixnum_shift");
+  declare_builtin ((unsigned long)asm_set_interrupt_enables, "asm_set_interrupt_enables");
+
+#ifdef _MACH_UNIX
+  {
+    vm_address_t addr;
+    vm_size_t size;
+    vm_prot_t prot;
+    vm_prot_t max_prot;
+    vm_inherit_t inheritance;
+    boolean_t shared;
+    port_t object;
+    vm_offset_t offset;
+
+    addr = ((vm_address_t) Heap);
+    if ((vm_region ((task_self ()), &addr, &size, &prot, &max_prot,
+                   &inheritance, &shared, &object, &offset))
+       != KERN_SUCCESS)
+      {
+       outf_fatal ( "compiler_reset: vm_region() failed.\n");
+       Microcode_Termination (TERM_EXIT);
+       /*NOTREACHED*/
+      }
+    if ((prot & VM_PROT_SCHEME) != VM_PROT_SCHEME)
+      {
+       if ((max_prot & VM_PROT_SCHEME) != VM_PROT_SCHEME)
+         {
+           outf_fatal (
+                       "compiler_reset: inadequate protection for Heap.\n");
+           outf_fatal ( "maximum = 0x%lx; desired = 0x%lx\n",
+                       ((unsigned long) (max_prot & VM_PROT_SCHEME)),
+                       ((unsigned long) VM_PROT_SCHEME));
+           Microcode_Termination (TERM_EXIT);
+           /*NOTREACHED*/
+         }
+       if ((vm_protect ((task_self ()), ((vm_address_t) Heap),
+                        (((char *) constant_end) - ((char *) Heap)),
+                        0, VM_PROT_SCHEME))
+           != KERN_SUCCESS)
+         {
+           outf_fatal ("Unable to change protection for Heap.\n");
+           outf_fatal ("actual = 0x%lx; desired = 0x%lx\n",
+                       ((unsigned long) (prot & VM_PROT_SCHEME)),
+                       ((unsigned long) VM_PROT_SCHEME));
+           Microcode_Termination (TERM_EXIT);
+           /*NOTREACHED*/
+         }
+      }
+  }
+#endif /* _MACH_UNIX */
+}
+
 #define SETUP_REGISTER(hook) do                                                \
 {                                                                      \
   (* ((unsigned long *) (esi_value + offset)))                         \
     = ((unsigned long) (hook));                                                \
   offset += (COMPILER_HOOK_SIZE * (sizeof (SCHEME_OBJECT)));           \
-  declare_builtin (((unsigned long) hook), #hook);                     \
 } while (0)
 
 void
@@ -251,7 +375,6 @@ i386_reset_hook (void)
 {
   unsigned int offset = (COMPILER_REGBLOCK_N_FIXED * (sizeof (SCHEME_OBJECT)));
   unsigned char * esi_value = ((unsigned char *) Registers);
-  bool fp_support_present = (i386_interface_initialize ());
 
   /* These must match machines/i386/lapgen.scm */
 
@@ -341,54 +464,6 @@ i386_reset_hook (void)
     SETUP_REGISTER (asm_dont_serialize_cache);         /* -7 */
   SETUP_REGISTER (asm_fixnum_shift);                   /* -6 */
   SETUP_REGISTER (asm_set_interrupt_enables);          /* -5 */
-
-#ifdef _MACH_UNIX
-  {
-    vm_address_t addr;
-    vm_size_t size;
-    vm_prot_t prot;
-    vm_prot_t max_prot;
-    vm_inherit_t inheritance;
-    boolean_t shared;
-    port_t object;
-    vm_offset_t offset;
-
-    addr = ((vm_address_t) Heap);
-    if ((vm_region ((task_self ()), &addr, &size, &prot, &max_prot,
-                   &inheritance, &shared, &object, &offset))
-       != KERN_SUCCESS)
-      {
-       outf_fatal ( "compiler_reset: vm_region() failed.\n");
-       Microcode_Termination (TERM_EXIT);
-       /*NOTREACHED*/
-      }
-    if ((prot & VM_PROT_SCHEME) != VM_PROT_SCHEME)
-      {
-       if ((max_prot & VM_PROT_SCHEME) != VM_PROT_SCHEME)
-         {
-           outf_fatal (
-                       "compiler_reset: inadequate protection for Heap.\n");
-           outf_fatal ( "maximum = 0x%lx; desired = 0x%lx\n",
-                       ((unsigned long) (max_prot & VM_PROT_SCHEME)),
-                       ((unsigned long) VM_PROT_SCHEME));
-           Microcode_Termination (TERM_EXIT);
-           /*NOTREACHED*/
-         }
-       if ((vm_protect ((task_self ()), ((vm_address_t) Heap),
-                        (((char *) constant_end) - ((char *) Heap)),
-                        0, VM_PROT_SCHEME))
-           != KERN_SUCCESS)
-         {
-           outf_fatal ("Unable to change protection for Heap.\n");
-           outf_fatal ("actual = 0x%lx; desired = 0x%lx\n",
-                       ((unsigned long) (prot & VM_PROT_SCHEME)),
-                       ((unsigned long) VM_PROT_SCHEME));
-           Microcode_Termination (TERM_EXIT);
-           /*NOTREACHED*/
-         }
-      }
-  }
-#endif /* _MACH_UNIX */
 }
 
 #ifndef HAVE_FENV_H
index 512a221e513303422ad3fb558969b77caa5f275c..30229327332c8b2096aae53e196fc507e2575a89 100644 (file)
@@ -182,6 +182,7 @@ magic = ([TC_COMPILED_ENTRY | 0] - (offset + length_of_CALL_instruction))
 
 */
 \f
+#define ASM_INIT_HOOK i386_init_hook
 #define ASM_RESET_HOOK i386_reset_hook
 
 #define CMPINT_USE_STRUCS 1
@@ -331,6 +332,7 @@ extern void start_closure_relocation (SCHEME_OBJECT *, reloc_ref_t *);
 extern insn_t * read_compiled_closure_target (insn_t *, reloc_ref_t *);
 extern void start_operator_relocation (SCHEME_OBJECT *, reloc_ref_t *);
 extern insn_t * read_uuo_target (SCHEME_OBJECT *, reloc_ref_t *);
+extern void i386_init_hook (void);
 extern void i386_reset_hook (void);
 
 extern int ia32_cpuid_needed;