From f2384feb368d44e9b80a789239e8d3b1ad4df689 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 3 Jan 2019 19:10:45 +0000 Subject: [PATCH] Open-code with-interrupt-mask, with-interrupts-reduced. Not open-coded at the RTL level, but at the LAP level. This way we avoid going through a return trampoline, which wrecks the return address stack branch target predictor as long as we transition between Scheme and C to handle trampolines. Most of the work, of munging MEMTOP and STACK_GUARD, is relegated to an assembly hook subroutine so the code doesn't expand too much. The format of the stack still uses reflect-to-interface so that this should require no changes to the continuation parser to get the interrupt masks right, but with an intermediate empty-frame continuation that actually calls the assembly hook and then pops reflect-to-interface off. --- src/compiler/machines/x86-64/lapgen.scm | 14 ++- src/compiler/machines/x86-64/machin.scm | 2 + src/compiler/machines/x86-64/rules1.scm | 7 +- src/compiler/machines/x86-64/rules3.scm | 124 ++++++++++++++++++++++++ src/microcode/cmpauxmd/x86-64.m4 | 67 +++++++++++++ src/microcode/cmpint.c | 5 +- src/microcode/cmpintmd/x86-64.c | 2 + src/microcode/cmpintmd/x86-64.h | 1 + 8 files changed, 217 insertions(+), 5 deletions(-) diff --git a/src/compiler/machines/x86-64/lapgen.scm b/src/compiler/machines/x86-64/lapgen.scm index 44241a11a..4b007dd4e 100644 --- a/src/compiler/machines/x86-64/lapgen.scm +++ b/src/compiler/machines/x86-64/lapgen.scm @@ -683,6 +683,17 @@ USA. (offset-reference regnum:regs-pointer register-block/stack-guard-offset)) +(define reg:int-mask + (offset-reference regnum:regs-pointer + register-block/int-mask-offset)) + +(define reg:int-code + (offset-reference regnum:regs-pointer + register-block/int-code-offset)) + +(define reg:reflect-to-interface + (offset-reference regnum:regs-pointer + register-block/reflect-to-interface-offset)) (define-syntax define-codes (sc-macro-transformer @@ -798,7 +809,8 @@ USA. apply-setup-size-5 apply-setup-size-6 apply-setup-size-7 - apply-setup-size-8) + apply-setup-size-8 + set-interrupt-enables!) ;; Operation tables diff --git a/src/compiler/machines/x86-64/machin.scm b/src/compiler/machines/x86-64/machin.scm index 96a1cfe92..a8078859d 100644 --- a/src/compiler/machines/x86-64/machin.scm +++ b/src/compiler/machines/x86-64/machin.scm @@ -228,6 +228,8 @@ USA. (define-integrable register-block/dynamic-link-offset 4) ; compiler temp (define-integrable register-block/lexpr-primitive-arity-offset 7) (define-integrable register-block/stack-guard-offset 11) +(define-integrable register-block/int-code-offset 12) +(define-integrable register-block/reflect-to-interface-offset 13) (define-integrable (fits-in-signed-byte? value) (<= #x-80 value #x7f)) diff --git a/src/compiler/machines/x86-64/rules1.scm b/src/compiler/machines/x86-64/rules1.scm index 899b89f1b..1a4f764ec 100644 --- a/src/compiler/machines/x86-64/rules1.scm +++ b/src/compiler/machines/x86-64/rules1.scm @@ -112,13 +112,16 @@ USA. (assign-register->register target datum) (affix-type (standard-move-to-target! datum target) type))) -(define (affix-type target type) +(define (affix-type target type #!optional get-temporary) (if (= 1 (bit-count type)) (let ((bit (first-set-bit type))) (assert (<= 0 bit)) (assert (< bit scheme-type-width)) (LAP (BTS Q ,target (&U ,(+ scheme-datum-width bit))))) - (let ((temp (temporary-register-reference))) + (let ((temp + (if (default-object? get-temporary) + (temporary-register-reference) + (get-temporary)))) (LAP (MOV Q ,temp (&U ,(make-non-pointer-literal type 0))) (OR Q ,target ,temp))))) diff --git a/src/compiler/machines/x86-64/rules3.scm b/src/compiler/machines/x86-64/rules3.scm index c240ae356..ea9cbc64d 100644 --- a/src/compiler/machines/x86-64/rules3.scm +++ b/src/compiler/machines/x86-64/rules3.scm @@ -219,6 +219,130 @@ USA. (LAP (MOV Q (R ,rdx) (&U ,frame-size)) ,@(invoke-interface code:compiler-apply)))))))) +;; Must match enum reflect_code_t in microcode/cmpint.c. +(define-integrable reflect-code:internal-apply 0) +(define-integrable reflect-code:restore-interrupt-mask 1) +(define-integrable reflect-code:stack-marker 2) +(define-integrable reflect-code:compiled-code-bkpt 3) +(define-integrable reflect-code:apply-compiled 6) + +(define-rule statement + (INVOCATION:PRIMITIVE (? frame-size) (? continuation) (? primitive)) + (QUALIFIER (eq? primitive (ucode-primitive set-interrupt-enables! 1))) + continuation + (assert (= frame-size 2)) + (let* ((prefix (clear-map!)) + (interrupt (generate-label 'INTERRUPT))) + (LAP ,@prefix + ;; Load new interrupt mask into rdx. + (POP Q (R ,rdx)) ;rdx := new interrupt mask + ;; Return value in rax is old interrupt mask. + (MOV Q (R ,rax) ;rax := old interrupt mask, tagged + (&U ,(make-non-pointer-literal (ucode-type fixnum) 0))) + (OR Q (R ,rax) ,reg:int-mask) + ;; Set the new interrupt mask. (Preserves rax.) + ,@(invoke-hook/subroutine entry:compiler-set-interrupt-enables!) + ;; Interrupts may now be enabled that weren't before, so check. + (CMP Q (R ,regnum:free-pointer) ,reg:compiled-memtop) + (JGE (@PCR ,interrupt)) + ;; Pop-return. Return value is in rax. + (AND Q (@R ,rsp) (R ,regnum:datum-mask)) + (RET) + (LABEL ,interrupt) + ,@(invoke-hook entry:compiler-interrupt-continuation-2)))) + +(define-rule statement + (INVOCATION:PRIMITIVE (? frame-size) (? continuation) (? primitive)) + (QUALIFIER + (or (eq? primitive (ucode-primitive with-interrupt-mask 2)) + (eq? primitive (ucode-primitive with-interrupts-reduced 2)))) + continuation + (assert (= frame-size 3)) + (let* ((prefix (clear-map!)) + (restore (generate-label 'RESTORE-INTERRUPTS)) + (pushed (generate-label 'PUSHED)) + (interrupt (generate-label 'INTERRUPT)) + (tag-continuation + (affix-type (INST-EA (@R ,rsp)) + type-code:compiled-return + (lambda () rax)))) + ;; Stack initially looks like: + ;; + ;; rsp[0] = new-mask + ;; rsp[1] = procedure + ;; + ;; Registers: + ;; - rbx: procedure, for apply-setup + ;; - rdx: new mask + ;; - rcx: fixnum tag + ;; - rax: continuation tag; jump target; return value. + ;; + (LAP ,@prefix + (POP Q (R ,rdx)) ;rdx := new-mask + (POP Q (R ,rbx)) ;rbx := procedure, for apply-setup + (MOV Q (R ,rcx) ;rcx := fixnum tag, for convenience + (&U ,(make-non-pointer-literal (ucode-type fixnum) 0))) + ;; Push reflect-to-interface(restore-interrupt-mask, old-mask) + ;; for the benefit of the continuation parser. + (PUSH Q ,reg:int-mask) + (OR Q (@R ,rsp) (R ,rcx)) + (PUSH Q (& ,reflect-code:restore-interrupt-mask)) + (OR Q (@R ,rsp) (R ,rcx)) + (PUSH Q ,reg:reflect-to-interface) + ;; Push a continuation onto the stack. + (CALL (@PCR ,pushed)) + (JMP (@PCR ,restore)) + (LABEL ,pushed) + ,@tag-continuation + ;; Push old mask argument. + (PUSH Q ,reg:int-mask) + (OR Q (@R ,rsp) (R ,rcx)) + + ;; Set new interrupt mask. It is tempting to just AND the new + ;; mask into the register for with-interrupts-reduced, but if + ;; we're disabling GC or stack overflow interrupts we also + ;; need to set MEMTOP and STACK_GUARD. + ,@(if (eq? primitive (ucode-primitive with-interrupts-reduced)) + (LAP (AND Q (R ,rdx) ,reg:int-mask)) + (LAP)) + ,@(invoke-hook/subroutine entry:compiler-set-interrupt-enables!) + ;; Apply the procedure in rbx. Stack now looks like: + ;; + ;; rsp[0] = new-mask + ;; rsp[1] = continuation + ;; rsp[2] = reflect-to-interface + ;; rsp[3] = reflect-code:restore-interrupt-mask + ;; rsp[4] = old-mask + ;; rsp[5] = continuation* + ;; + ;; Apply with a frame of size 2 = 1 (procedure) + 1 argument. + ;; Hook sets rax to the jump target -- either the compiled + ;; entry, or another hook to fall back to the interpreter. + ,@(invoke-hook/subroutine entry:compiler-apply-setup-size-2) + (JMP (R ,rax)) + ,@(make-external-label (continuation-code-word #f) restore) + ;; Return value in rax, so don't overwrite it. Stack now + ;; looks like: + ;; + ;; rsp[0] = reflect-to-interface + ;; rsp[1] = reflect-code:restore-interrupt-mask + ;; rsp[2] = old-mask + ;; rsp[3] = continuation* + ;; + ;; Pop reflect-to-interface -- we won't actually use it. + (ADD Q (R ,rsp) (& #x10)) + ;; Restore interrupts mask. + (POP Q (R ,rdx)) + ,@(invoke-hook/subroutine entry:compiler-set-interrupt-enables!) + ;; Interrupts may be unmasked now, so check. + (CMP Q (R ,regnum:free-pointer) ,reg:compiled-memtop) + (JGE (@PCR ,interrupt)) + ;; Pop-return. + (AND Q (@R ,rsp) (R ,regnum:datum-mask)) + (RET) + (LABEL ,interrupt) + ,@(invoke-hook entry:compiler-interrupt-continuation-2)))) + (let-syntax ((define-primitive-invocation (sc-macro-transformer diff --git a/src/microcode/cmpauxmd/x86-64.m4 b/src/microcode/cmpauxmd/x86-64.m4 index a5eb18e48..336b98291 100644 --- a/src/microcode/cmpauxmd/x86-64.m4 +++ b/src/microcode/cmpauxmd/x86-64.m4 @@ -309,9 +309,17 @@ define(IMM_TRUE, `IMM(HEX(2000000000000000))') define(FLONUM_DATA_OFFSET,8) define(FLONUM_STORAGE_SIZE,16) +define(INT_Stack_Overflow,HEX(1)) +define(INT_GC,HEX(4)) +define(INT_Mask,HEX(ffff)) + +define(REGBLOCK_MEMTOP,0) +define(REGBLOCK_INT_MASK,8) define(REGBLOCK_VAL,16) define(REGBLOCK_COMPILER_TEMP,32) define(REGBLOCK_DLINK,REGBLOCK_COMPILER_TEMP) +define(REGBLOCK_STACK_GUARD,88) +define(REGBLOCK_INT_CODE,96) define(regs,REG(rsi)) define(rfree,REG(rdi)) @@ -324,7 +332,11 @@ DECLARE_DATA_SEGMENT() declare_alignment(2) use_external_data(EVR(Free)) +use_external_data(EVR(heap_alloc_limit)) +use_external_data(EVR(heap_end)) +use_external_data(EVR(stack_guard)) use_external_data(EVR(stack_pointer)) +use_external_data(EVR(stack_start)) use_external_data(EVR(utility_table)) ifdef(`WIN32',` @@ -694,6 +706,61 @@ define_apply_fixed_size(5) define_apply_fixed_size(6) define_apply_fixed_size(7) define_apply_fixed_size(8) + +# Interrupt checks hook. + +define_hook_label(set_interrupt_enables) + # stack: untagged return address + # rax = return value, must preserve + # rdx = tagged interrupt mask on entry, garbage on exit + # rcx = temp + + # Mask off tag and other crud and set the interpreter register. + OP(and,q) TW(IMM(INT_Mask),REG(rdx)) + OP(mov,q) TW(REG(rdx),QOF(REGBLOCK_INT_MASK(),regs)) + + # This logic more or less follows COMPILER_SETUP_INTERRUPT. + +define_debugging_label(set_interrupt_enables_determine_memtop) + # Set memtop: + # (a) to 0 if there's an interrupt pending, + # (b) to heap_end if GC is disabled, or + # (c) to heap_alloc_limit if GC is enabled. + # Forward branches for (a) or (b) so they are statically + # predicted not-taken. + OP(test,q) TW(QOF(REGBLOCK_INT_CODE(),regs),REG(rdx)) + jnz set_interrupt_enables_pending_interrupt + OP(test,q) TW(IMM(INT_GC),REG(rdx)) + jz set_interrupt_enables_no_gc + OP(mov,q) TW(ABS(EVR(heap_alloc_limit)),REG(rcx)) +define_debugging_label(set_interrupt_enables_memtop) + OP(mov,q) TW(REG(rcx),QOF(REGBLOCK_MEMTOP(),regs)) + +define_debugging_label(set_interrupt_enables_determine_stack_guard) + # Set stack guard register: + # (a) to stack_start, if stack overflow interrupt is blocked, or + # (b) to stack_guard, if stack overflow interrupt is allowed. + OP(test,q) TW(IMM(INT_Stack_Overflow),REG(rdx)) + jz set_interrupt_enables_no_stackoverflow + OP(mov,q) TW(ABS(EVR(stack_guard)),REG(rcx)) +define_debugging_label(set_interrupt_enables_stack_guard) + OP(mov,q) TW(REG(rcx),QOF(REGBLOCK_STACK_GUARD(),regs)) + + # All set! + ret + +define_debugging_label(set_interrupt_enables_pending_interrupt) + OP(mov,q) TW(ABS(EVR(memory_block_start)),REG(rcx)) + jmp set_interrupt_enables_memtop + +define_debugging_label(set_interrupt_enables_no_gc) + OP(mov,q) TW(ABS(EVR(heap_end)),REG(rcx)) + jmp set_interrupt_enables_memtop + +define_debugging_label(set_interrupt_enables_no_stackoverflow) + OP(mov,q) TW(ABS(EVR(stack_start)),REG(rcx)) + jmp set_interrupt_enables_stack_guard + ### The following code is used by generic arithmetic ### whether the fixnum case is open-coded in line or not. diff --git a/src/microcode/cmpint.c b/src/microcode/cmpint.c index cd5cedab8..78f69c739 100644 --- a/src/microcode/cmpint.c +++ b/src/microcode/cmpint.c @@ -67,8 +67,9 @@ typedef struct unsigned long n_linked_entries; } link_cc_state_t; -/* Ways to bypass the interpreter. Matches - code/special-compiled/... in runtime/conpar.scm. */ +/* Ways to bypass the interpreter. Sync with: + - runtime/conpar.scm, code/special-compiled/... + - compiler/machines/x86-64/rules3.scm, reflect-code:... */ typedef enum { REFLECT_CODE_INTERNAL_APPLY, diff --git a/src/microcode/cmpintmd/x86-64.c b/src/microcode/cmpintmd/x86-64.c index 195d75d09..84b9f6214 100644 --- a/src/microcode/cmpintmd/x86-64.c +++ b/src/microcode/cmpintmd/x86-64.c @@ -379,6 +379,8 @@ x86_64_reset_hook (void) SETUP_REGISTER (asm_apply_setup_size_7); /* 48 */ SETUP_REGISTER (asm_apply_setup_size_8); /* 49 */ + SETUP_REGISTER (asm_set_interrupt_enables); /* 50 */ + #ifdef _MACH_UNIX { vm_address_t addr; diff --git a/src/microcode/cmpintmd/x86-64.h b/src/microcode/cmpintmd/x86-64.h index 1d775e783..1c89e6caf 100644 --- a/src/microcode/cmpintmd/x86-64.h +++ b/src/microcode/cmpintmd/x86-64.h @@ -271,6 +271,7 @@ extern void asm_apply_setup_size_5 (void); extern void asm_apply_setup_size_6 (void); extern void asm_apply_setup_size_7 (void); extern void asm_apply_setup_size_8 (void); +extern void asm_set_interrupt_enables (void); extern void asm_scheme_to_interface (void); extern void asm_scheme_to_interface_call (void); extern void asm_serialize_cache (void); -- 2.25.1