From f09ede9e1805c1c46788a1da8246b4efe81f5c2a Mon Sep 17 00:00:00 2001 From: Matt Birkholz Date: Tue, 21 Jul 2015 00:27:11 -0700 Subject: [PATCH] Add i386 assembly to allocate and access thread-local variables. --- src/microcode/cmpauxmd/i386.m4 | 94 ++++++++++++------ src/microcode/cmpintmd/i386.c | 175 +++++++++++++++++++++++---------- src/microcode/cmpintmd/i386.h | 2 + 3 files changed, 191 insertions(+), 80 deletions(-) diff --git a/src/microcode/cmpauxmd/i386.m4 b/src/microcode/cmpauxmd/i386.m4 index 2eaf3672a..06cf9b9b9 100644 --- a/src/microcode/cmpauxmd/i386.m4 +++ b/src/microcode/cmpauxmd/i386.m4 @@ -41,7 +41,7 @@ ### 2) The C compiler divides registers into three groups: ### - Linkage registers, used for procedure calls and global ### references. On i386 (gcc and Zortech C): %ebp, %esp. -### - super temporaries, not preserved accross procedure calls and +### - super temporaries, not preserved across procedure calls and ### always usable. On i386 (gcc and Zortech C): %eax, %edx, %ecx. ### - preserved registers saved by the callee if they are written. ### On i386 (gcc and Zortech C): all others (%ebx, %esi, %edi). @@ -176,6 +176,9 @@ IFNDASM(`define(popfd,`popf')') ifdef(`SUPPRESS_LEADING_UNDERSCORE', `define(EVR,`$1')', `define(EVR,`_$1')') +ifdef(`ENABLE_SMP', + `define(TLVR,`%gs:$1\@ntpoff')', + `define(TLVR,`EVR($1)')') # When using the Watcom C compiler with register-based calling # conventions, source-code function names normally expand to `FOO_', @@ -350,6 +353,38 @@ define(rmask,REG(ebp)) IFDASM(`.586p .model flat') +use_external_data(EVR(utility_table)) + +ifdef(`ENABLE_SMP',` + .section .tbss,"awT",\@nobits + .align 4 +define(`define_tllong',` + .globl $1 + .type $1 STT_TLS + .size $1, 4 +$1: + .zero 4 +') + define_tllong(Free) + define_tllong(heap_alloc_limit) + define_tllong(heap_end) + define_tllong(stack_guard) + define_tllong(stack_pointer) + define_tllong(stack_start) + define_tllong(C_Stack_Pointer) + define_tllong(C_Frame_Pointer) + + .zero 128 + .globl Registers + .type Registers STT_TLS + .size Registers, eval(REGBLOCK_SIZE_IN_OBJECTS*4) +Registers: + .zero eval(REGBLOCK_SIZE_IN_OBJECTS*4) +',` +ifdef(`WIN32',` +use_external_data(EVR(RegistersPtr)) +',` + DECLARE_DATA_SEGMENT() declare_alignment(2) @@ -359,29 +394,25 @@ use_external_data(EVR(heap_end)) use_external_data(EVR(stack_guard)) use_external_data(EVR(stack_pointer)) use_external_data(EVR(stack_start)) -use_external_data(EVR(utility_table)) -ifdef(`WIN32',` -use_external_data(EVR(RegistersPtr)) -',` define_data(Regstart) allocate_space(Regstart,128) define_data(Registers) allocate_space(Registers,eval(REGBLOCK_SIZE_IN_OBJECTS*4)) -') - -define_data(i387_presence) -allocate_longword(i387_presence) - -define_data(sse_presence) -allocate_longword(sse_presence) define_data(C_Stack_Pointer) allocate_longword(C_Stack_Pointer) define_data(C_Frame_Pointer) allocate_longword(C_Frame_Pointer) +')') + +define_data(i387_presence) +allocate_longword(i387_presence) + +define_data(sse_presence) +allocate_longword(sse_presence) define_data(ia32_cpuid_supported) allocate_longword(ia32_cpuid_supported) @@ -559,7 +590,7 @@ no_cpuid_instr: # set esp to something funny. define_c_label(within_c_stack) - OP(mov,l) TW(EVR(C_Stack_Pointer),REG(eax)) + OP(mov,l) TW(TLVR(C_Stack_Pointer),REG(eax)) # Are we currently in C, signalled by having no saved C stack pointer? OP(cmp,l) TW(IMM(0),REG(eax)) # Yes: just call the function without messing with esp. @@ -568,7 +599,7 @@ define_c_label(within_c_stack) OP(push,l) REG(ebp) # Save frame pointer OP(mov,l) TW(REG(esp),REG(ebp)) OP(mov,l) TW(REG(eax),REG(esp)) # Switch to C stack - OP(mov,l) TW(IMM(0),EVR(C_Stack_Pointer)) + OP(mov,l) TW(IMM(0),TLVR(C_Stack_Pointer)) OP(push,l) IMM(0) # Align sp to 16 bytes OP(push,l) REG(ebp) # Save stack pointer OP(push,l) LOF(HEX(c),REG(ebp)) # Push argument @@ -578,7 +609,7 @@ define_debugging_label(within_c_stack_restore) OP(pop,l) REG(eax) # Pop argument OP(mov,l) TW(REG(esp),REG(eax)) # Restore C stack ptr OP(add,l) TW(IMM(8),REG(eax)) - OP(mov,l) TW(REG(eax),EVR(C_Stack_Pointer)) + OP(mov,l) TW(REG(eax),TLVR(C_Stack_Pointer)) OP(pop,l) REG(esp) # Restore stack pointer # and switch back to # Scheme stack @@ -603,15 +634,18 @@ define_c_label(C_to_interface) OP(push,l) REG(ebx) OP(mov,l) TW(LOF(8,REG(ebp)),REG(edx)) # Entry point # Preserve frame ptr - OP(mov,l) TW(REG(ebp),EVR(C_Frame_Pointer)) + OP(mov,l) TW(REG(ebp),TLVR(C_Frame_Pointer)) # Preserve stack ptr - OP(mov,l) TW(REG(esp),EVR(C_Stack_Pointer)) + OP(mov,l) TW(REG(esp),TLVR(C_Stack_Pointer)) # Register block = %esi # Scheme offset in NT ifdef(`WIN32', ` OP(mov,l) TW(ABS(EVR(RegistersPtr)),regs)', -` OP(lea,l) TW(ABS(EVR(Registers)),regs)') +`ifdef(`ENABLE_SMP', +` OP(mov,l) TW(%gs:0,regs) + OP(add,l) TW($Registers\@ntpoff,regs)', +` OP(lea,l) TW(ABS(EVR(Registers)),regs)')') jmp EPFR(interface_to_scheme) define_hook_label(trampoline_to_interface) @@ -630,8 +664,8 @@ define_debugging_label(scheme_to_interface) # These two moves must happen _before_ the ffree instructions below. # Otherwise recovery from SIGFPE there will fail. - OP(mov,l) TW(REG(esp),EVR(stack_pointer)) - OP(mov,l) TW(rfree,EVR(Free)) + OP(mov,l) TW(REG(esp),TLVR(stack_pointer)) + OP(mov,l) TW(rfree,TLVR(Free)) IF387(` OP(cmp,l) TW(IMM(0),ABS(EVR(i387_presence))) @@ -647,11 +681,11 @@ IF387(` scheme_to_interface_proceed: ') - OP(mov,l) TW(EVR(C_Stack_Pointer),REG(esp)) - OP(mov,l) TW(EVR(C_Frame_Pointer),REG(ebp)) + OP(mov,l) TW(TLVR(C_Stack_Pointer),REG(esp)) + OP(mov,l) TW(TLVR(C_Frame_Pointer),REG(ebp)) # Signal to within_c_stack that we are now in C land. - OP(mov,l) TW(IMM(0),EVR(C_Stack_Pointer)) + OP(mov,l) TW(IMM(0),TLVR(C_Stack_Pointer)) OP(sub,l) TW(IMM(8),REG(esp)) # alloc struct return @@ -689,14 +723,14 @@ IF387(` ffree ST(7) interface_to_scheme_proceed: ') - OP(mov,l) TW(EVR(Free),rfree) # Free pointer = %edi + OP(mov,l) TW(TLVR(Free),rfree) # Free pointer = %edi OP(mov,l) TW(LOF(REGBLOCK_VAL(),regs),REG(eax)) # Value/dynamic link OP(mov,l) TW(IMM(ADDRESS_MASK),rmask) # = %ebp # Restore the C stack pointer, which we zeroed back in # scheme_to_interface, for within_c_stack. - OP(mov,l) TW(REG(esp),EVR(C_Stack_Pointer)) - OP(mov,l) TW(EVR(stack_pointer),REG(esp)) + OP(mov,l) TW(REG(esp),TLVR(C_Stack_Pointer)) + OP(mov,l) TW(TLVR(stack_pointer),REG(esp)) OP(mov,l) TW(REG(eax),REG(ecx)) # Preserve if used OP(and,l) TW(rmask,REG(ecx)) # Restore potential dynamic link OP(mov,l) TW(REG(ecx),LOF(REGBLOCK_DLINK(),regs)) @@ -889,13 +923,13 @@ set_interrupt_enables_memtop_1: # If GC is enabled, set memtop to the heap allocation limit. OP(test,l) TW(IMM(INT_GC),REG(ecx)) jz set_interrupt_enables_memtop_2 - OP(mov,l) TW(ABS(EVR(heap_alloc_limit)),REG(edx)) + OP(mov,l) TW(ABS(TLVR(heap_alloc_limit)),REG(edx)) jmp set_interrupt_enables_set_memtop set_interrupt_enables_memtop_2: # Otherwise, there is no interrupt pending, and GC is not # enabled, so set memtop to the absolute heap end. - OP(mov,l) TW(ABS(EVR(heap_end)),REG(edx)) + OP(mov,l) TW(ABS(TLVR(heap_end)),REG(edx)) set_interrupt_enables_set_memtop: OP(mov,l) TW(REG(edx),LOF(REGBLOCK_MEMTOP(),regs)) @@ -903,11 +937,11 @@ set_interrupt_enables_set_memtop: set_interrupt_enables_determine_stack_guard: OP(test,l) TW(IMM(INT_Stack_Overflow),REG(ecx)) jz set_interrupt_enables_stack_guard_1 - OP(mov,l) TW(ABS(EVR(stack_guard)),REG(edx)) + OP(mov,l) TW(ABS(TLVR(stack_guard)),REG(edx)) jmp set_interrupt_enables_set_stack_guard set_interrupt_enables_stack_guard_1: - OP(mov,l) TW(ABS(EVR(stack_start)),REG(edx)) + OP(mov,l) TW(ABS(TLVR(stack_start)),REG(edx)) set_interrupt_enables_set_stack_guard: OP(mov,l) TW(REG(edx),LOF(REGBLOCK_STACK_GUARD(),regs)) diff --git a/src/microcode/cmpintmd/i386.c b/src/microcode/cmpintmd/i386.c index cc628f201..a3f8e7c8a 100644 --- a/src/microcode/cmpintmd/i386.c +++ b/src/microcode/cmpintmd/i386.c @@ -238,12 +238,136 @@ store_trampoline_insns (insn_t * entry, byte_t code) # define VM_PROT_SCHEME (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE) #endif +static bool fp_support_present = false; + +void +i386_init_hook (void) +{ + fp_support_present = (i386_interface_initialize ()); + + declare_builtin ((unsigned long)asm_scheme_to_interface, "asm_scheme_to_interface"); + declare_builtin ((unsigned long)asm_scheme_to_interface_call, "asm_scheme_to_interface_call"); + declare_builtin ((unsigned long)asm_trampoline_to_interface, "asm_trampoline_to_interface"); + declare_builtin ((unsigned long)asm_interrupt_procedure, "asm_interrupt_procedure"); + declare_builtin ((unsigned long)asm_interrupt_continuation, "asm_interrupt_continuation"); + declare_builtin ((unsigned long)asm_interrupt_closure, "asm_interrupt_closure"); + declare_builtin ((unsigned long)asm_interrupt_dlink, "asm_interrupt_dlink"); + declare_builtin ((unsigned long)asm_primitive_apply, "asm_primitive_apply"); + declare_builtin ((unsigned long)asm_primitive_lexpr_apply, "asm_primitive_lexpr_apply"); + declare_builtin ((unsigned long)asm_assignment_trap, "asm_assignment_trap"); + declare_builtin ((unsigned long)asm_reference_trap, "asm_reference_trap"); + declare_builtin ((unsigned long)asm_safe_reference_trap, "asm_safe_reference_trap"); + declare_builtin ((unsigned long)asm_link, "asm_link"); + declare_builtin ((unsigned long)asm_error, "asm_error"); + declare_builtin ((unsigned long)asm_primitive_error, "asm_primitive_error"); + declare_builtin ((unsigned long)asm_short_primitive_apply, "asm_short_primitive_apply"); + if (fp_support_present) + { + declare_builtin ((unsigned long)asm_generic_add, "asm_generic_add"); + declare_builtin ((unsigned long)asm_generic_subtract, "asm_generic_subtract"); + declare_builtin ((unsigned long)asm_generic_multiply, "asm_generic_multiply"); + declare_builtin ((unsigned long)asm_generic_divide, "asm_generic_divide"); + declare_builtin ((unsigned long)asm_generic_equal, "asm_generic_equal"); + declare_builtin ((unsigned long)asm_generic_less, "asm_generic_less"); + declare_builtin ((unsigned long)asm_generic_greater, "asm_generic_greater"); + declare_builtin ((unsigned long)asm_generic_increment, "asm_generic_increment"); + declare_builtin ((unsigned long)asm_generic_decrement, "asm_generic_decrement"); + declare_builtin ((unsigned long)asm_generic_zero, "asm_generic_zero"); + declare_builtin ((unsigned long)asm_generic_positive, "asm_generic_positive"); + declare_builtin ((unsigned long)asm_generic_negative, "asm_generic_negative"); + declare_builtin ((unsigned long)asm_generic_quotient, "asm_generic_quotient"); + declare_builtin ((unsigned long)asm_generic_remainder, "asm_generic_remainder"); + declare_builtin ((unsigned long)asm_generic_modulo, "asm_generic_modulo"); + } + else + { + declare_builtin ((unsigned long)asm_nofp_add, "asm_nofp_add"); + declare_builtin ((unsigned long)asm_nofp_subtract, "asm_nofp_subtract"); + declare_builtin ((unsigned long)asm_nofp_multiply, "asm_nofp_multiply"); + declare_builtin ((unsigned long)asm_nofp_divide, "asm_nofp_divide"); + declare_builtin ((unsigned long)asm_nofp_equal, "asm_nofp_equal"); + declare_builtin ((unsigned long)asm_nofp_less, "asm_nofp_less"); + declare_builtin ((unsigned long)asm_nofp_greater, "asm_nofp_greater"); + declare_builtin ((unsigned long)asm_nofp_increment, "asm_nofp_increment"); + declare_builtin ((unsigned long)asm_nofp_decrement, "asm_nofp_decrement"); + declare_builtin ((unsigned long)asm_nofp_zero, "asm_nofp_zero"); + declare_builtin ((unsigned long)asm_nofp_positive, "asm_nofp_positive"); + declare_builtin ((unsigned long)asm_nofp_negative, "asm_nofp_negative"); + declare_builtin ((unsigned long)asm_nofp_quotient, "asm_nofp_quotient"); + declare_builtin ((unsigned long)asm_nofp_remainder, "asm_nofp_remainder"); + declare_builtin ((unsigned long)asm_nofp_modulo, "asm_nofp_modulo"); + } + declare_builtin ((unsigned long)asm_sc_apply, "asm_sc_apply"); + declare_builtin ((unsigned long)asm_sc_apply_size_1, "asm_sc_apply_size_1"); + declare_builtin ((unsigned long)asm_sc_apply_size_2, "asm_sc_apply_size_2"); + declare_builtin ((unsigned long)asm_sc_apply_size_3, "asm_sc_apply_size_3"); + declare_builtin ((unsigned long)asm_sc_apply_size_4, "asm_sc_apply_size_4"); + declare_builtin ((unsigned long)asm_sc_apply_size_5, "asm_sc_apply_size_5"); + declare_builtin ((unsigned long)asm_sc_apply_size_6, "asm_sc_apply_size_6"); + declare_builtin ((unsigned long)asm_sc_apply_size_7, "asm_sc_apply_size_7"); + declare_builtin ((unsigned long)asm_sc_apply_size_8, "asm_sc_apply_size_8"); + declare_builtin ((unsigned long)asm_interrupt_continuation_2, "asm_interrupt_continuation_2"); + if (ia32_cpuid_needed) + declare_builtin ((unsigned long)asm_serialize_cache, "asm_serialize_cache"); + else + declare_builtin ((unsigned long)asm_dont_serialize_cache, "asm_dont_serialize_cache"); + declare_builtin ((unsigned long)asm_fixnum_shift, "asm_fixnum_shift"); + declare_builtin ((unsigned long)asm_set_interrupt_enables, "asm_set_interrupt_enables"); + +#ifdef _MACH_UNIX + { + vm_address_t addr; + vm_size_t size; + vm_prot_t prot; + vm_prot_t max_prot; + vm_inherit_t inheritance; + boolean_t shared; + port_t object; + vm_offset_t offset; + + addr = ((vm_address_t) Heap); + if ((vm_region ((task_self ()), &addr, &size, &prot, &max_prot, + &inheritance, &shared, &object, &offset)) + != KERN_SUCCESS) + { + outf_fatal ( "compiler_reset: vm_region() failed.\n"); + Microcode_Termination (TERM_EXIT); + /*NOTREACHED*/ + } + if ((prot & VM_PROT_SCHEME) != VM_PROT_SCHEME) + { + if ((max_prot & VM_PROT_SCHEME) != VM_PROT_SCHEME) + { + outf_fatal ( + "compiler_reset: inadequate protection for Heap.\n"); + outf_fatal ( "maximum = 0x%lx; desired = 0x%lx\n", + ((unsigned long) (max_prot & VM_PROT_SCHEME)), + ((unsigned long) VM_PROT_SCHEME)); + Microcode_Termination (TERM_EXIT); + /*NOTREACHED*/ + } + if ((vm_protect ((task_self ()), ((vm_address_t) Heap), + (((char *) constant_end) - ((char *) Heap)), + 0, VM_PROT_SCHEME)) + != KERN_SUCCESS) + { + outf_fatal ("Unable to change protection for Heap.\n"); + outf_fatal ("actual = 0x%lx; desired = 0x%lx\n", + ((unsigned long) (prot & VM_PROT_SCHEME)), + ((unsigned long) VM_PROT_SCHEME)); + Microcode_Termination (TERM_EXIT); + /*NOTREACHED*/ + } + } + } +#endif /* _MACH_UNIX */ +} + #define SETUP_REGISTER(hook) do \ { \ (* ((unsigned long *) (esi_value + offset))) \ = ((unsigned long) (hook)); \ offset += (COMPILER_HOOK_SIZE * (sizeof (SCHEME_OBJECT))); \ - declare_builtin (((unsigned long) hook), #hook); \ } while (0) void @@ -251,7 +375,6 @@ i386_reset_hook (void) { unsigned int offset = (COMPILER_REGBLOCK_N_FIXED * (sizeof (SCHEME_OBJECT))); unsigned char * esi_value = ((unsigned char *) Registers); - bool fp_support_present = (i386_interface_initialize ()); /* These must match machines/i386/lapgen.scm */ @@ -341,54 +464,6 @@ i386_reset_hook (void) SETUP_REGISTER (asm_dont_serialize_cache); /* -7 */ SETUP_REGISTER (asm_fixnum_shift); /* -6 */ SETUP_REGISTER (asm_set_interrupt_enables); /* -5 */ - -#ifdef _MACH_UNIX - { - vm_address_t addr; - vm_size_t size; - vm_prot_t prot; - vm_prot_t max_prot; - vm_inherit_t inheritance; - boolean_t shared; - port_t object; - vm_offset_t offset; - - addr = ((vm_address_t) Heap); - if ((vm_region ((task_self ()), &addr, &size, &prot, &max_prot, - &inheritance, &shared, &object, &offset)) - != KERN_SUCCESS) - { - outf_fatal ( "compiler_reset: vm_region() failed.\n"); - Microcode_Termination (TERM_EXIT); - /*NOTREACHED*/ - } - if ((prot & VM_PROT_SCHEME) != VM_PROT_SCHEME) - { - if ((max_prot & VM_PROT_SCHEME) != VM_PROT_SCHEME) - { - outf_fatal ( - "compiler_reset: inadequate protection for Heap.\n"); - outf_fatal ( "maximum = 0x%lx; desired = 0x%lx\n", - ((unsigned long) (max_prot & VM_PROT_SCHEME)), - ((unsigned long) VM_PROT_SCHEME)); - Microcode_Termination (TERM_EXIT); - /*NOTREACHED*/ - } - if ((vm_protect ((task_self ()), ((vm_address_t) Heap), - (((char *) constant_end) - ((char *) Heap)), - 0, VM_PROT_SCHEME)) - != KERN_SUCCESS) - { - outf_fatal ("Unable to change protection for Heap.\n"); - outf_fatal ("actual = 0x%lx; desired = 0x%lx\n", - ((unsigned long) (prot & VM_PROT_SCHEME)), - ((unsigned long) VM_PROT_SCHEME)); - Microcode_Termination (TERM_EXIT); - /*NOTREACHED*/ - } - } - } -#endif /* _MACH_UNIX */ } #ifndef HAVE_FENV_H diff --git a/src/microcode/cmpintmd/i386.h b/src/microcode/cmpintmd/i386.h index 512a221e5..302293273 100644 --- a/src/microcode/cmpintmd/i386.h +++ b/src/microcode/cmpintmd/i386.h @@ -182,6 +182,7 @@ magic = ([TC_COMPILED_ENTRY | 0] - (offset + length_of_CALL_instruction)) */ +#define ASM_INIT_HOOK i386_init_hook #define ASM_RESET_HOOK i386_reset_hook #define CMPINT_USE_STRUCS 1 @@ -331,6 +332,7 @@ extern void start_closure_relocation (SCHEME_OBJECT *, reloc_ref_t *); extern insn_t * read_compiled_closure_target (insn_t *, reloc_ref_t *); extern void start_operator_relocation (SCHEME_OBJECT *, reloc_ref_t *); extern insn_t * read_uuo_target (SCHEME_OBJECT *, reloc_ref_t *); +extern void i386_init_hook (void); extern void i386_reset_hook (void); extern int ia32_cpuid_needed; -- 2.25.1